Files
ollama/app/ui/extract.go
Daniel Hiltgen d3b4b9970a app: add code for macOS and Windows apps under 'app' (#12933)
* app: add code for macOS and Windows apps under 'app'

* app: add readme

* app: windows and linux only for now

* ci: fix ui CI validation

---------

Co-authored-by: jmorganca <jmorganca@gmail.com>
2025-11-04 11:40:17 -08:00

85 lines
2.0 KiB
Go

//go:build windows || darwin
package ui
import (
"bytes"
"fmt"
"path/filepath"
"slices"
"strings"
"unicode/utf8"
"github.com/ledongthuc/pdf"
)
// convertBytesToText converts raw file bytes to text based on file extension
func convertBytesToText(data []byte, filename string) string {
ext := strings.ToLower(filepath.Ext(filename))
if ext == ".pdf" {
text, err := extractPDFText(data)
if err != nil {
return fmt.Sprintf("[PDF file - %d bytes - failed to extract text: %v]", len(data), err)
}
if strings.TrimSpace(text) == "" {
return fmt.Sprintf("[PDF file - %d bytes - no text content found]", len(data))
}
return text
}
binaryExtensions := []string{
".xlsx", ".pptx", ".zip", ".tar", ".gz", ".rar",
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".svg", ".ico",
".mp3", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".webm",
".exe", ".dll", ".so", ".dylib", ".app", ".dmg", ".pkg",
}
if slices.Contains(binaryExtensions, ext) {
return fmt.Sprintf("[Binary file of type %s - %d bytes]", ext, len(data))
}
if utf8.Valid(data) {
return string(data)
}
// If not valid UTF-8, return a placeholder
return fmt.Sprintf("[Binary file - %d bytes - not valid UTF-8]", len(data))
}
// extractPDFText extracts text content from PDF bytes
func extractPDFText(data []byte) (string, error) {
reader := bytes.NewReader(data)
pdfReader, err := pdf.NewReader(reader, int64(len(data)))
if err != nil {
return "", fmt.Errorf("failed to create PDF reader: %w", err)
}
var textBuilder strings.Builder
numPages := pdfReader.NumPage()
for i := 1; i <= numPages; i++ {
page := pdfReader.Page(i)
if page.V.IsNull() {
continue
}
text, err := page.GetPlainText(nil)
if err != nil {
// Log the error but continue with other pages
continue
}
if strings.TrimSpace(text) != "" {
if textBuilder.Len() > 0 {
textBuilder.WriteString("\n\n--- Page ")
textBuilder.WriteString(fmt.Sprintf("%d", i))
textBuilder.WriteString(" ---\n")
}
textBuilder.WriteString(text)
}
}
return textBuilder.String(), nil
}