11 bulan lalu · b73a512f24
--- a/llm/server.go
+++ b/llm/server.go
@@ -89,6 +89,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 
			
 
				 		cpuRunner = serverForCpu()
			
 
				 		gpuCount = 0
			
 
				+		_, _, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
			
 
				 	} else {
			
 
				 		if gpus[0].Library == "metal" {
			
 
				 			memInfo, err := gpu.GetCPUMem()
			
--- a/server/routes.go
+++ b/server/routes.go
@@ -1161,6 +1161,14 @@ func (s *Server) ProcessHandler(c *gin.Context) {
 
				 			Details:   modelDetails,
			
 
				 			ExpiresAt: v.expiresAt,
			
 
				 		}
			
 
				+		// The scheduler waits to set expiresAt, so if a model is loading it's
			
 
				+		// possible that it will be set to the unix epoch. For those cases, just
			
 
				+		// calculate the time w/ the sessionDuration instead.
			
 
				+		var epoch time.Time
			
 
				+		if v.expiresAt == epoch {
			
 
				+			mr.ExpiresAt = time.Now().Add(v.sessionDuration)
			
 
				+		}
			
 
				+
			
 
				 		models = append(models, mr)
			
 
				 	}