瀏覽代碼

llamarunner: Init GGML before printing system info

We currently print system info before the GGML backends are loaded.
This results in only getting information about the default lowest
common denominator runner. If we move up the GGML init then we can
see what we are actually running.

Before:
time=2025-02-14T11:15:07.606-08:00 level=INFO source=runner.go:935 msg=system info="CPU : LLAMAFILE = 1 | CPU : LLAMAFILE = 1 | cgo(gcc)" threads=24

After:
time=2025-02-14T11:16:02.936-08:00 level=INFO source=runner.go:935 msg=system info="CPU : LLAMAFILE = 1 | CPU : LLAMAFILE = 1 | CUDA : ARCHS = 890 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | LLAMAFILE = 1 | cgo(gcc)" threads=24
Jesse Gross 2 月之前
父節點
當前提交
010313bb63
共有 1 個文件被更改,包括 2 次插入2 次删除
  1. 2 2
      runner/llamarunner/runner.go

+ 2 - 2
runner/llamarunner/runner.go

@@ -845,8 +845,6 @@ func (s *Server) loadModel(
 	threads int,
 	threads int,
 	multiUserCache bool,
 	multiUserCache bool,
 ) {
 ) {
-	llama.BackendInit()
-
 	var err error
 	var err error
 	s.model, err = llama.LoadModelFromFile(mpath, params)
 	s.model, err = llama.LoadModelFromFile(mpath, params)
 	if err != nil {
 	if err != nil {
@@ -932,6 +930,8 @@ func Execute(args []string) error {
 	})
 	})
 	slog.SetDefault(slog.New(handler))
 	slog.SetDefault(slog.New(handler))
 	slog.Info("starting go runner")
 	slog.Info("starting go runner")
+
+	llama.BackendInit()
 	slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads)
 	slog.Info("system", "info", llama.PrintSystemInfo(), "threads", *threads)
 
 
 	server := &Server{
 	server := &Server{