diff --git a/llama/llama.go b/llama/llama.go index 7bcdddef9..f5bddbdc2 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -95,34 +95,24 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error { return fmt.Errorf("inference failed") } - fmt.Println("hi 4") - C.llama_free_params(params) - fmt.Println("hi 5") - return nil } func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) { po := NewPredictOptions(opts...) - fmt.Println("predict 1") - if po.TokenCallback != nil { setCallback(l.ctx, po.TokenCallback) } - fmt.Println("predict 2") - input := C.CString(text) if po.Tokens == 0 { po.Tokens = 99999999 } out := make([]byte, po.Tokens) - fmt.Println("predict 3") - reverseCount := len(po.StopPrompts) reversePrompt := make([]*C.char, reverseCount) var pass **C.char @@ -132,8 +122,6 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) { pass = &reversePrompt[0] } - fmt.Println("predict 4") - params := C.llama_allocate_params(input, C.int(po.Seed), C.int(po.Threads), C.int(po.Tokens), C.int(po.TopK), C.float(po.TopP), C.float(po.Temperature), C.float(po.Penalty), C.int(po.Repeat), C.bool(po.IgnoreEOS), C.bool(po.F16KV), @@ -145,15 +133,12 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) { C.bool(po.PromptCacheRO), ) - fmt.Println("predict 4.5") ret := C.llama_predict(params, l.ctx, (*C.char)(unsafe.Pointer(&out[0])), C.bool(po.DebugMode)) if ret != 0 { return "", fmt.Errorf("inference failed") } res := C.GoString((*C.char)(unsafe.Pointer(&out[0]))) - fmt.Println("predict 5") - res = strings.TrimPrefix(res, " ") res = strings.TrimPrefix(res, text) res = strings.TrimPrefix(res, "\n") @@ -162,8 +147,6 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) { res = strings.TrimRight(res, s) } - fmt.Println("predict 6") - C.llama_free_params(params) if po.TokenCallback != nil { diff --git a/server/routes.go b/server/routes.go index 9490441e5..8796a37be 100644 --- a/server/routes.go +++ b/server/routes.go @@ -26,7 +26,6 @@ func generate(c *gin.Context) { tokens := 512 threads := runtime.NumCPU() // TODO: set prompt from template - fmt.Println("Generating text...") var req api.GenerateRequest if err := c.ShouldBindJSON(&req); err != nil { @@ -34,8 +33,6 @@ func generate(c *gin.Context) { return } - fmt.Println(req) - l, err := llama.New(req.Model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers)) if err != nil { fmt.Println("Loading the model failed:", err.Error())