@@ -89,6 +89,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
cpuRunner = serverForCpu()
gpuCount = 0
+ _, _, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
} else {
if gpus[0].Library == "metal" {
memInfo, err := gpu.GetCPUMem()
@@ -1161,6 +1161,14 @@ func (s *Server) ProcessHandler(c *gin.Context) {
Details: modelDetails,
ExpiresAt: v.expiresAt,
}
+ // The scheduler waits to set expiresAt, so if a model is loading it's
+ // possible that it will be set to the unix epoch. For those cases, just
+ // calculate the time w/ the sessionDuration instead.
+ var epoch time.Time
+ if v.expiresAt == epoch {
+ mr.ExpiresAt = time.Now().Add(v.sessionDuration)
+ }
+
models = append(models, mr)