mirror of
https://github.com/ollama/ollama.git
synced 2025-11-11 18:46:55 +01:00
* app: add code for macOS and Windows apps under 'app' * app: add readme * app: windows and linux only for now * ci: fix ui CI validation --------- Co-authored-by: jmorganca <jmorganca@gmail.com>
85 lines
2.0 KiB
Go
85 lines
2.0 KiB
Go
//go:build windows || darwin
|
|
|
|
package ui
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"path/filepath"
|
|
"slices"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"github.com/ledongthuc/pdf"
|
|
)
|
|
|
|
// convertBytesToText converts raw file bytes to text based on file extension
|
|
func convertBytesToText(data []byte, filename string) string {
|
|
ext := strings.ToLower(filepath.Ext(filename))
|
|
|
|
if ext == ".pdf" {
|
|
text, err := extractPDFText(data)
|
|
if err != nil {
|
|
return fmt.Sprintf("[PDF file - %d bytes - failed to extract text: %v]", len(data), err)
|
|
}
|
|
if strings.TrimSpace(text) == "" {
|
|
return fmt.Sprintf("[PDF file - %d bytes - no text content found]", len(data))
|
|
}
|
|
return text
|
|
}
|
|
|
|
binaryExtensions := []string{
|
|
".xlsx", ".pptx", ".zip", ".tar", ".gz", ".rar",
|
|
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".svg", ".ico",
|
|
".mp3", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".webm",
|
|
".exe", ".dll", ".so", ".dylib", ".app", ".dmg", ".pkg",
|
|
}
|
|
|
|
if slices.Contains(binaryExtensions, ext) {
|
|
return fmt.Sprintf("[Binary file of type %s - %d bytes]", ext, len(data))
|
|
}
|
|
|
|
if utf8.Valid(data) {
|
|
return string(data)
|
|
}
|
|
|
|
// If not valid UTF-8, return a placeholder
|
|
return fmt.Sprintf("[Binary file - %d bytes - not valid UTF-8]", len(data))
|
|
}
|
|
|
|
// extractPDFText extracts text content from PDF bytes
|
|
func extractPDFText(data []byte) (string, error) {
|
|
reader := bytes.NewReader(data)
|
|
pdfReader, err := pdf.NewReader(reader, int64(len(data)))
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to create PDF reader: %w", err)
|
|
}
|
|
|
|
var textBuilder strings.Builder
|
|
numPages := pdfReader.NumPage()
|
|
|
|
for i := 1; i <= numPages; i++ {
|
|
page := pdfReader.Page(i)
|
|
if page.V.IsNull() {
|
|
continue
|
|
}
|
|
|
|
text, err := page.GetPlainText(nil)
|
|
if err != nil {
|
|
// Log the error but continue with other pages
|
|
continue
|
|
}
|
|
|
|
if strings.TrimSpace(text) != "" {
|
|
if textBuilder.Len() > 0 {
|
|
textBuilder.WriteString("\n\n--- Page ")
|
|
textBuilder.WriteString(fmt.Sprintf("%d", i))
|
|
textBuilder.WriteString(" ---\n")
|
|
}
|
|
textBuilder.WriteString(text)
|
|
}
|
|
}
|
|
|
|
return textBuilder.String(), nil
|
|
}
|