11 月之前 · 646371f56d
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -16,6 +16,7 @@ import (
 
															 	"os"
														
 
															 	"path/filepath"
														
 
															 	"runtime"
														
 
															+	"strconv"
														
 
															 	"strings"
														
 
															 	"sync"
														
 
															 	"unsafe"
														
@@ -28,6 +29,7 @@ type handles struct {
 
															 	deviceCount int
														
 
															 	cudart      *C.cudart_handle_t
														
 
															 	nvcuda      *C.nvcuda_handle_t
														
 
															+	oneapi      *C.oneapi_handle_t
														
 
															 }
														
 
															 const (
														
@@ -80,6 +82,15 @@ var NvcudaWindowsGlobs = []string{
 
															 	"c:\\windows\\system*\\nvcuda.dll",
														
 
															 }
														
 
															+var OneapiWindowsGlobs = []string{
														
 
															+	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
														
 
															+}
														
 
															+
														
 
															+var OneapiLinuxGlobs = []string{
														
 
															+	"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
														
 
															+	"/usr/lib*/libze_intel_gpu.so*",
														
 
															+}
														
 
															+
														
 
															 // Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
														
 
															 // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
														
 
															 var CudaTegra string = os.Getenv("JETSON_JETPACK")
														
@@ -94,6 +105,8 @@ func initGPUHandles() *handles {
 
															 	var cudartMgmtPatterns []string
														
 
															 	var nvcudaMgmtName string
														
 
															 	var nvcudaMgmtPatterns []string
														
 
															+	var oneapiMgmtName string
														
 
															+	var oneapiMgmtPatterns []string
														
 
															 	tmpDir, _ := PayloadsDir()
														
 
															 	switch runtime.GOOS {
														
@@ -105,6 +118,8 @@ func initGPUHandles() *handles {
 
															 		// Aligned with driver, we can't carry as payloads
														
 
															 		nvcudaMgmtName = "nvcuda.dll"
														
 
															 		nvcudaMgmtPatterns = NvcudaWindowsGlobs
														
 
															+		oneapiMgmtName = "ze_intel_gpu64.dll"
														
 
															+		oneapiMgmtPatterns = OneapiWindowsGlobs
														
 
															 	case "linux":
														
 
															 		cudartMgmtName = "libcudart.so*"
														
 
															 		if tmpDir != "" {
														
@@ -115,6 +130,8 @@ func initGPUHandles() *handles {
 
															 		// Aligned with driver, we can't carry as payloads
														
 
															 		nvcudaMgmtName = "libcuda.so*"
														
 
															 		nvcudaMgmtPatterns = NvcudaLinuxGlobs
														
 
															+		oneapiMgmtName = "libze_intel_gpu.so"
														
 
															+		oneapiMgmtPatterns = OneapiLinuxGlobs
														
 
															 	default:
														
 
															 		return gpuHandles
														
 
															 	}
														
@@ -141,6 +158,18 @@ func initGPUHandles() *handles {
 
															 			return gpuHandles
														
 
															 		}
														
 
															 	}
														
 
															+
														
 
															+	oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns)
														
 
															+	if len(oneapiLibPaths) > 0 {
														
 
															+		deviceCount, oneapi, libPath := LoadOneapiMgmt(oneapiLibPaths)
														
 
															+		if oneapi != nil {
														
 
															+			slog.Debug("detected Intel GPUs", "library", libPath, "count", deviceCount)
														
 
															+			gpuHandles.oneapi = oneapi
														
 
															+			gpuHandles.deviceCount = deviceCount
														
 
															+			return gpuHandles
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															 	return gpuHandles
														
 
															 }
														
@@ -181,39 +210,53 @@ func GetGPUInfo() GpuInfoList {
 
															 		if cpuVariant == "" && runtime.GOARCH == "amd64" {
														
 
															 			continue
														
 
															 		}
														
 
															-		gpuInfo := GpuInfo{
														
 
															-			Library: "cuda",
														
 
															-		}
														
 
															-		var driverMajor int
														
 
															-		var driverMinor int
														
 
															-		if gpuHandles.cudart != nil {
														
 
															-			C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
														
 
															-		} else {
														
 
															-			C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
														
 
															-			driverMajor = int(gpuHandles.nvcuda.driver_major)
														
 
															-			driverMinor = int(gpuHandles.nvcuda.driver_minor)
														
 
															-		}
														
 
															-		if memInfo.err != nil {
														
 
															-			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
														
 
															-			C.free(unsafe.Pointer(memInfo.err))
														
 
															-			continue
														
 
															+		if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
														
 
															+			gpuInfo := GpuInfo{
														
 
															+				Library: "cuda",
														
 
															+			}
														
 
															+			var driverMajor int
														
 
															+			var driverMinor int
														
 
															+			if gpuHandles.cudart != nil {
														
 
															+				C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
														
 
															+			} else {
														
 
															+				C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
														
 
															+				driverMajor = int(gpuHandles.nvcuda.driver_major)
														
 
															+				driverMinor = int(gpuHandles.nvcuda.driver_minor)
														
 
															+			}
														
 
															+			if memInfo.err != nil {
														
 
															+				slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
														
 
															+				C.free(unsafe.Pointer(memInfo.err))
														
 
															+				continue
														
 
															+			}
														
 
															+			if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
														
 
															+				slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
														
 
															+				continue
														
 
															+			}
														
 
															+			gpuInfo.TotalMemory = uint64(memInfo.total)
														
 
															+			gpuInfo.FreeMemory = uint64(memInfo.free)
														
 
															+			gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
														
 
															+			gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
														
 
															+			gpuInfo.MinimumMemory = cudaMinimumMemory
														
 
															+			gpuInfo.DependencyPath = depPath
														
 
															+			gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
														
 
															+			gpuInfo.DriverMajor = int(driverMajor)
														
 
															+			gpuInfo.DriverMinor = int(driverMinor)
														
 
															+
														
 
															+			// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
														
 
															+			resp = append(resp, gpuInfo)
														
 
															 		}
														
 
															-		if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
														
 
															-			slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
														
 
															-			continue
														
 
															+		if gpuHandles.oneapi != nil {
														
 
															+			gpuInfo := GpuInfo{
														
 
															+				Library: "oneapi",
														
 
															+			}
														
 
															+			C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
														
 
															+			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
														
 
															+			memInfo.free = C.uint64_t(totalFreeMem)
														
 
															+			gpuInfo.TotalMemory = uint64(memInfo.total)
														
 
															+			gpuInfo.FreeMemory = uint64(memInfo.free)
														
 
															+			gpuInfo.ID = strconv.Itoa(i)
														
 
															+			resp = append(resp, gpuInfo)
														
 
															 		}
														
 
															-		gpuInfo.TotalMemory = uint64(memInfo.total)
														
 
															-		gpuInfo.FreeMemory = uint64(memInfo.free)
														
 
															-		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
														
 
															-		gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
														
 
															-		gpuInfo.MinimumMemory = cudaMinimumMemory
														
 
															-		gpuInfo.DependencyPath = depPath
														
 
															-		gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
														
 
															-		gpuInfo.DriverMajor = int(driverMajor)
														
 
															-		gpuInfo.DriverMinor = int(driverMinor)
														
 
															-
														
 
															-		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
														
 
															-		resp = append(resp, gpuInfo)
														
 
															 	}
														
 
															 	// Then AMD
														
@@ -348,6 +391,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
 
															 	return 0, nil, ""
														
 
															 }
														
 
															+func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
														
 
															+	var resp C.oneapi_init_resp_t
														
 
															+	resp.oh.verbose = getVerboseState()
														
 
															+	for _, libPath := range oneapiLibPaths {
														
 
															+		lib := C.CString(libPath)
														
 
															+		defer C.free(unsafe.Pointer(lib))
														
 
															+		C.oneapi_init(lib, &resp)
														
 
															+		if resp.err != nil {
														
 
															+			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
														
 
															+			C.free(unsafe.Pointer(resp.err))
														
 
															+		} else {
														
 
															+			return int(resp.num_devices), &resp.oh, libPath
														
 
															+		}
														
 
															+	}
														
 
															+	return 0, nil, ""
														
 
															+}
														
 
															+
														
 
															 func getVerboseState() C.uint16_t {
														
 
															 	if envconfig.Debug {
														
 
															 		return C.uint16_t(1)
														
@@ -368,6 +428,8 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
 
															 		return cudaGetVisibleDevicesEnv(l)
														
 
															 	case "rocm":
														
 
															 		return rocmGetVisibleDevicesEnv(l)
														
 
															+	case "oneapi":
														
 
															+		return oneapiGetVisibleDevicesEnv(l)
														
 
															 	default:
														
 
															 		slog.Debug("no filter required for library " + l[0].Library)
														
 
															 		return "", ""
														
--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@@ -62,6 +62,7 @@ void cpu_check_ram(mem_info_t *resp);
 
															 #include "gpu_info_cudart.h"
														
 
															 #include "gpu_info_nvcuda.h"
														
 
															+#include "gpu_info_oneapi.h"
														
 
															 #endif  // __GPU_INFO_H__
														
 
															 #endif  // __APPLE__
														
--- a/gpu/gpu_info_oneapi.c
+++ b/gpu/gpu_info_oneapi.c
@@ -0,0 +1,214 @@
 
															+#ifndef __APPLE__
														
 
															+
														
 
															+#include "gpu_info_oneapi.h"
														
 
															+
														
 
															+#include <string.h>
														
 
															+
														
 
															+void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
														
 
															+{
														
 
															+  ze_result_t ret;
														
 
															+  resp->err = NULL;
														
 
															+  const int buflen = 256;
														
 
															+  char buf[buflen + 1];
														
 
															+  int i;
														
 
															+  struct lookup
														
 
															+  {
														
 
															+    char *s;
														
 
															+    void **p;
														
 
															+  } l[] = {
														
 
															+      {"zesInit", (void *)&resp->oh.zesInit},
														
 
															+      {"zesDriverGet", (void *)&resp->oh.zesDriverGet},
														
 
															+      {"zesDeviceGet", (void *)&resp->oh.zesDeviceGet},
														
 
															+      {"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties},
														
 
															+      {"zesDeviceEnumMemoryModules",
														
 
															+       (void *)&resp->oh.zesDeviceEnumMemoryModules},
														
 
															+      {"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties},
														
 
															+      {"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState},
														
 
															+      {NULL, NULL},
														
 
															+  };
														
 
															+
														
 
															+  resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
														
 
															+  if (!resp->oh.handle)
														
 
															+  {
														
 
															+    char *msg = LOAD_ERR();
														
 
															+    snprintf(buf, buflen,
														
 
															+             "Unable to load %s library to query for Intel GPUs: %s\n",
														
 
															+             oneapi_lib_path, msg);
														
 
															+    free(msg);
														
 
															+    resp->err = strdup(buf);
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  // TODO once we've squashed the remaining corner cases remove this log
														
 
															+  LOG(resp->oh.verbose,
														
 
															+      "wiring Level-Zero management library functions in %s\n",
														
 
															+      oneapi_lib_path);
														
 
															+
														
 
															+  for (i = 0; l[i].s != NULL; i++)
														
 
															+  {
														
 
															+    // TODO once we've squashed the remaining corner cases remove this log
														
 
															+    LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
														
 
															+
														
 
															+    *l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
														
 
															+    if (!l[i].p)
														
 
															+    {
														
 
															+      resp->oh.handle = NULL;
														
 
															+      char *msg = LOAD_ERR();
														
 
															+      LOG(resp->oh.verbose, "dlerr: %s\n", msg);
														
 
															+      UNLOAD_LIBRARY(resp->oh.handle);
														
 
															+      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg);
														
 
															+      free(msg);
														
 
															+      resp->err = strdup(buf);
														
 
															+      return;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  ret = (*resp->oh.zesInit)(0);
														
 
															+  if (ret != ZE_RESULT_SUCCESS)
														
 
															+  {
														
 
															+    LOG(resp->oh.verbose, "zesInit err: %d\n", ret);
														
 
															+    UNLOAD_LIBRARY(resp->oh.handle);
														
 
															+    resp->oh.handle = NULL;
														
 
															+    snprintf(buf, buflen, "oneapi vram init failure: %d", ret);
														
 
															+    resp->err = strdup(buf);
														
 
															+  }
														
 
															+
														
 
															+  (*resp->oh.zesDriverGet)(&resp->num_devices, NULL);
														
 
															+
														
 
															+  return;
														
 
															+}
														
 
															+
														
 
															+void oneapi_check_vram(oneapi_handle_t h, mem_info_t *resp)
														
 
															+{
														
 
															+  ze_result_t ret;
														
 
															+  resp->err = NULL;
														
 
															+  uint64_t totalMem = 0;
														
 
															+  uint64_t usedMem = 0;
														
 
															+  const int buflen = 256;
														
 
															+  char buf[buflen + 1];
														
 
															+  int i, d, m;
														
 
															+
														
 
															+  if (h.handle == NULL)
														
 
															+  {
														
 
															+    resp->err = strdup("Level-Zero handle not initialized");
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  uint32_t driversCount = 0;
														
 
															+  ret = (*h.zesDriverGet)(&driversCount, NULL);
														
 
															+  if (ret != ZE_RESULT_SUCCESS)
														
 
															+  {
														
 
															+    snprintf(buf, buflen, "unable to get driver count: %d", ret);
														
 
															+    resp->err = strdup(buf);
														
 
															+    return;
														
 
															+  }
														
 
															+  LOG(h.verbose, "discovered %d Level-Zero drivers\n", driversCount);
														
 
															+
														
 
															+  zes_driver_handle_t *allDrivers =
														
 
															+      malloc(driversCount * sizeof(zes_driver_handle_t));
														
 
															+  (*h.zesDriverGet)(&driversCount, allDrivers);
														
 
															+
														
 
															+  resp->total = 0;
														
 
															+  resp->free = 0;
														
 
															+
														
 
															+  for (d = 0; d < driversCount; d++)
														
 
															+  {
														
 
															+    uint32_t deviceCount = 0;
														
 
															+    ret = (*h.zesDeviceGet)(allDrivers[d], &deviceCount, NULL);
														
 
															+    if (ret != ZE_RESULT_SUCCESS)
														
 
															+    {
														
 
															+      snprintf(buf, buflen, "unable to get device count: %d", ret);
														
 
															+      resp->err = strdup(buf);
														
 
															+      free(allDrivers);
														
 
															+      return;
														
 
															+    }
														
 
															+
														
 
															+    LOG(h.verbose, "discovered %d Level-Zero devices\n", deviceCount);
														
 
															+
														
 
															+    zes_device_handle_t *devices =
														
 
															+        malloc(deviceCount * sizeof(zes_device_handle_t));
														
 
															+    (*h.zesDeviceGet)(allDrivers[d], &deviceCount, devices);
														
 
															+
														
 
															+    for (i = 0; i < deviceCount; i++)
														
 
															+    {
														
 
															+      zes_device_ext_properties_t ext_props;
														
 
															+      ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
														
 
															+      ext_props.pNext = NULL;
														
 
															+
														
 
															+      zes_device_properties_t props;
														
 
															+      props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
														
 
															+      props.pNext = &ext_props;
														
 
															+
														
 
															+      ret = (*h.zesDeviceGetProperties)(devices[i], &props);
														
 
															+      if (ret != ZE_RESULT_SUCCESS)
														
 
															+      {
														
 
															+        snprintf(buf, buflen, "unable to get device properties: %d", ret);
														
 
															+        resp->err = strdup(buf);
														
 
															+        free(allDrivers);
														
 
															+        free(devices);
														
 
															+        return;
														
 
															+      }
														
 
															+
														
 
															+      if (h.verbose)
														
 
															+      {
														
 
															+        // When in verbose mode, report more information about
														
 
															+        // the card we discover.
														
 
															+        LOG(h.verbose, "[%d] oneAPI device name: %s\n", i,
														
 
															+            props.modelName);
														
 
															+        LOG(h.verbose, "[%d] oneAPI brand: %s\n", i,
														
 
															+            props.brandName);
														
 
															+        LOG(h.verbose, "[%d] oneAPI vendor: %s\n", i,
														
 
															+            props.vendorName);
														
 
															+        LOG(h.verbose, "[%d] oneAPI S/N: %s\n", i,
														
 
															+            props.serialNumber);
														
 
															+        LOG(h.verbose, "[%d] oneAPI board number: %s\n", i,
														
 
															+            props.boardNumber);
														
 
															+      }
														
 
															+
														
 
															+      uint32_t memCount = 0;
														
 
															+      ret = (*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, NULL);
														
 
															+      if (ret != ZE_RESULT_SUCCESS)
														
 
															+      {
														
 
															+        snprintf(buf, buflen,
														
 
															+                 "unable to enumerate Level-Zero memory modules: %d", ret);
														
 
															+        resp->err = strdup(buf);
														
 
															+        free(allDrivers);
														
 
															+        free(devices);
														
 
															+        return;
														
 
															+      }
														
 
															+
														
 
															+      LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);
														
 
															+
														
 
															+      zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
														
 
															+      (*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, mems);
														
 
															+
														
 
															+      for (m = 0; m < memCount; m++)
														
 
															+      {
														
 
															+        zes_mem_state_t state;
														
 
															+        state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
														
 
															+        state.pNext = NULL;
														
 
															+        ret = (*h.zesMemoryGetState)(mems[m], &state);
														
 
															+        if (ret != ZE_RESULT_SUCCESS)
														
 
															+        {
														
 
															+          snprintf(buf, buflen, "unable to get memory state: %d", ret);
														
 
															+          resp->err = strdup(buf);
														
 
															+          free(allDrivers);
														
 
															+          free(devices);
														
 
															+          free(mems);
														
 
															+          return;
														
 
															+        }
														
 
															+
														
 
															+        resp->total += state.size;
														
 
															+        resp->free += state.free;
														
 
															+      }
														
 
															+
														
 
															+      free(mems);
														
 
															+    }
														
 
															+
														
 
															+    free(devices);
														
 
															+  }
														
 
															+
														
 
															+  free(allDrivers);
														
 
															+}
														
 
															+
														
 
															+#endif // __APPLE__
														
--- a/gpu/gpu_info_oneapi.h
+++ b/gpu/gpu_info_oneapi.h
@@ -0,0 +1,211 @@
 
															+#ifndef __APPLE__
														
 
															+#ifndef __GPU_INFO_ONEAPI_H__
														
 
															+#define __GPU_INFO_ONEAPI_H__
														
 
															+#include "gpu_info.h"
														
 
															+
														
 
															+#define ZE_MAX_DEVICE_NAME 256
														
 
															+#define ZE_MAX_DEVICE_UUID_SIZE 16
														
 
															+#define ZES_STRING_PROPERTY_SIZE 64
														
 
															+#define ZE_BIT(_i) (1 << _i)
														
 
															+
														
 
															+// Just enough typedef's to dlopen/dlsym for memory information
														
 
															+typedef enum ze_result_t
														
 
															+{
														
 
															+  ZE_RESULT_SUCCESS = 0,
														
 
															+  // Other values omitted for now...
														
 
															+} ze_result_t;
														
 
															+
														
 
															+typedef uint8_t ze_bool_t;
														
 
															+typedef struct _zes_driver_handle_t *zes_driver_handle_t;
														
 
															+typedef struct _zes_device_handle_t *zes_device_handle_t;
														
 
															+typedef struct _zes_mem_handle_t *zes_mem_handle_t;
														
 
															+
														
 
															+typedef enum _ze_structure_type_t
														
 
															+{
														
 
															+  ZE_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
														
 
															+} ze_structure_type_t;
														
 
															+
														
 
															+typedef enum _zes_structure_type_t
														
 
															+{
														
 
															+  ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x1,
														
 
															+  ZES_STRUCTURE_TYPE_MEM_PROPERTIES = 0xb,
														
 
															+  ZES_STRUCTURE_TYPE_MEM_STATE = 0x1e,
														
 
															+  ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES = 0x2d,
														
 
															+  ZES_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
														
 
															+} zes_structure_type_t;
														
 
															+
														
 
															+typedef enum _zes_mem_type_t
														
 
															+{
														
 
															+  ZES_MEM_TYPE_FORCE_UINT32 = 0x7fffffff
														
 
															+} zes_mem_type_t;
														
 
															+
														
 
															+typedef enum _zes_mem_loc_t
														
 
															+{
														
 
															+  ZES_MEM_LOC_SYSTEM = 0,
														
 
															+  ZES_MEM_LOC_DEVICE = 1,
														
 
															+  ZES_MEM_LOC_FORCE_UINT32 = 0x7fffffff
														
 
															+} zes_mem_loc_t;
														
 
															+
														
 
															+typedef enum _zes_mem_health_t
														
 
															+{
														
 
															+  ZES_MEM_HEALTH_FORCE_UINT32 = 0x7fffffff
														
 
															+} zes_mem_health_t;
														
 
															+
														
 
															+typedef struct _ze_device_uuid_t
														
 
															+{
														
 
															+  uint8_t id[ZE_MAX_DEVICE_UUID_SIZE];
														
 
															+} ze_device_uuid_t;
														
 
															+
														
 
															+typedef struct _zes_uuid_t
														
 
															+{
														
 
															+  uint8_t id[ZE_MAX_DEVICE_UUID_SIZE];
														
 
															+} zes_uuid_t;
														
 
															+
														
 
															+typedef enum _ze_device_type_t
														
 
															+{
														
 
															+  ZE_DEVICE_TYPE_GPU = 1,
														
 
															+  ZE_DEVICE_TYPE_CPU = 2,
														
 
															+  ZE_DEVICE_TYPE_FPGA = 3,
														
 
															+  ZE_DEVICE_TYPE_MCA = 4,
														
 
															+  ZE_DEVICE_TYPE_VPU = 5,
														
 
															+  ZE_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff
														
 
															+} ze_device_type_t;
														
 
															+
														
 
															+typedef enum _zes_device_type_t
														
 
															+{
														
 
															+  ZES_DEVICE_TYPE_GPU = 1,
														
 
															+  ZES_DEVICE_TYPE_CPU = 2,
														
 
															+  ZES_DEVICE_TYPE_FPGA = 3,
														
 
															+  ZES_DEVICE_TYPE_MCA = 4,
														
 
															+  ZES_DEVICE_TYPE_VPU = 5,
														
 
															+  ZES_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff
														
 
															+} zes_device_type_t;
														
 
															+
														
 
															+typedef uint32_t ze_device_property_flags_t;
														
 
															+typedef enum _ze_device_property_flag_t
														
 
															+{
														
 
															+  ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0),
														
 
															+  ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1),
														
 
															+  ZE_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2),
														
 
															+  ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = ZE_BIT(3),
														
 
															+  ZE_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff
														
 
															+} ze_device_property_flag_t;
														
 
															+
														
 
															+typedef uint32_t zes_device_property_flags_t;
														
 
															+typedef enum _zes_device_property_flag_t
														
 
															+{
														
 
															+  ZES_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0),
														
 
															+  ZES_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1),
														
 
															+  ZES_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2),
														
 
															+  ZES_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = ZE_BIT(3),
														
 
															+  ZES_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff
														
 
															+} zes_device_property_flag_t;
														
 
															+
														
 
															+typedef struct _ze_device_properties_t
														
 
															+{
														
 
															+  ze_structure_type_t stype;
														
 
															+  void *pNext;
														
 
															+  ze_device_type_t type;
														
 
															+  uint32_t vendorId;
														
 
															+  uint32_t deviceId;
														
 
															+  ze_device_property_flags_t flags;
														
 
															+  uint32_t subdeviceId;
														
 
															+  uint32_t coreClockRate;
														
 
															+  uint64_t maxMemAllocSize;
														
 
															+  uint32_t maxHardwareContexts;
														
 
															+  uint32_t maxCommandQueuePriority;
														
 
															+  uint32_t numThreadsPerEU;
														
 
															+  uint32_t physicalEUSimdWidth;
														
 
															+  uint32_t numEUsPerSubslice;
														
 
															+  uint32_t numSubslicesPerSlice;
														
 
															+  uint32_t numSlices;
														
 
															+  uint64_t timerResolution;
														
 
															+  uint32_t timestampValidBits;
														
 
															+  uint32_t kernelTimestampValidBits;
														
 
															+  ze_device_uuid_t uuid;
														
 
															+  char name[ZE_MAX_DEVICE_NAME];
														
 
															+} ze_device_properties_t;
														
 
															+
														
 
															+typedef struct _zes_device_properties_t
														
 
															+{
														
 
															+  zes_structure_type_t stype;
														
 
															+  void *pNext;
														
 
															+  ze_device_properties_t core;
														
 
															+  uint32_t numSubdevices;
														
 
															+  char serialNumber[ZES_STRING_PROPERTY_SIZE];
														
 
															+  char boardNumber[ZES_STRING_PROPERTY_SIZE];
														
 
															+  char brandName[ZES_STRING_PROPERTY_SIZE];
														
 
															+  char modelName[ZES_STRING_PROPERTY_SIZE];
														
 
															+  char vendorName[ZES_STRING_PROPERTY_SIZE];
														
 
															+  char driverVersion[ZES_STRING_PROPERTY_SIZE];
														
 
															+} zes_device_properties_t;
														
 
															+
														
 
															+typedef struct _zes_device_ext_properties_t
														
 
															+{
														
 
															+  zes_structure_type_t stype;
														
 
															+  void *pNext;
														
 
															+  zes_uuid_t uuid;
														
 
															+  zes_device_type_t type;
														
 
															+  zes_device_property_flags_t flags;
														
 
															+} zes_device_ext_properties_t;
														
 
															+
														
 
															+typedef struct _zes_mem_properties_t
														
 
															+{
														
 
															+  zes_structure_type_t stype;
														
 
															+  void *pNext;
														
 
															+  zes_mem_type_t type;
														
 
															+  ze_bool_t onSubdevice;
														
 
															+  uint32_t subdeviceId;
														
 
															+  zes_mem_loc_t location;
														
 
															+  uint64_t physicalSize;
														
 
															+  int32_t busWidth;
														
 
															+  int32_t numChannels;
														
 
															+} zes_mem_properties_t;
														
 
															+
														
 
															+typedef struct _zes_mem_state_t
														
 
															+{
														
 
															+  zes_structure_type_t stype;
														
 
															+  const void *pNext;
														
 
															+  zes_mem_health_t health;
														
 
															+  uint64_t free;
														
 
															+  uint64_t size;
														
 
															+} zes_mem_state_t;
														
 
															+
														
 
															+typedef struct oneapi_handle
														
 
															+{
														
 
															+  void *handle;
														
 
															+  uint16_t verbose;
														
 
															+  ze_result_t (*zesInit)(int);
														
 
															+  ze_result_t (*zesDriverGet)(uint32_t *pCount, zes_driver_handle_t *phDrivers);
														
 
															+  ze_result_t (*zesDeviceGet)(zes_driver_handle_t hDriver, uint32_t *pCount,
														
 
															+                              zes_device_handle_t *phDevices);
														
 
															+  ze_result_t (*zesDeviceGetProperties)(zes_device_handle_t hDevice,
														
 
															+                                        zes_device_properties_t *pProperties);
														
 
															+  ze_result_t (*zesDeviceEnumMemoryModules)(zes_device_handle_t hDevice,
														
 
															+                                            uint32_t *pCount,
														
 
															+                                            zes_mem_handle_t *phMemory);
														
 
															+  ze_result_t (*zesMemoryGetProperties)(zes_mem_handle_t hMemory,
														
 
															+                                        zes_mem_properties_t *pProperties);
														
 
															+  ze_result_t (*zesMemoryGetState)(zes_mem_handle_t hMemory,
														
 
															+                                   zes_mem_state_t *pState);
														
 
															+
														
 
															+} oneapi_handle_t;
														
 
															+
														
 
															+typedef struct oneapi_init_resp
														
 
															+{
														
 
															+  char *err; // If err is non-null handle is invalid
														
 
															+  int num_devices;
														
 
															+  oneapi_handle_t oh;
														
 
															+} oneapi_init_resp_t;
														
 
															+
														
 
															+typedef struct oneapi_version_resp
														
 
															+{
														
 
															+  ze_result_t status;
														
 
															+  char *str; // Contains version or error string if status != 0
														
 
															+} oneapi_version_resp_t;
														
 
															+
														
 
															+void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp);
														
 
															+void oneapi_check_vram(oneapi_handle_t rh, mem_info_t *resp);
														
 
															+
														
 
															+#endif // __GPU_INFO_INTEL_H__
														
 
															+#endif // __APPLE__
														
--- a/gpu/gpu_oneapi.go
+++ b/gpu/gpu_oneapi.go
@@ -0,0 +1,21 @@
 
															+//go:build linux || windows
														
 
															+
														
 
															+package gpu
														
 
															+
														
 
															+import (
														
 
															+	"log/slog"
														
 
															+	"strings"
														
 
															+)
														
 
															+
														
 
															+func oneapiGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
														
 
															+	ids := []string{}
														
 
															+	for _, info := range gpuInfo {
														
 
															+		if info.Library != "oneapi" {
														
 
															+			// TODO shouldn't happen if things are wired correctly...
														
 
															+			slog.Debug("oneapiGetVisibleDevicesEnv skipping over non-sycl device", "library", info.Library)
														
 
															+			continue
														
 
															+		}
														
 
															+		ids = append(ids, info.ID)
														
 
															+	}
														
 
															+	return "ONEAPI_DEVICE_SELECTOR", "level_zero:" + strings.Join(ids, ",")
														
 
															+}
														
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -206,6 +206,36 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
 
															 fi
														
 
															+if [ -z "${ONEAPI_ROOT}" ]; then
														
 
															+    # Try the default location in case it exists
														
 
															+    ONEAPI_ROOT=/opt/intel/oneapi
														
 
															+fi
														
 
															+
														
 
															+if [ -d "${ONEAPI_ROOT}" ]; then
														
 
															+    echo "OneAPI libraries detected - building dynamic OneAPI library"
														
 
															+    init_vars
														
 
															+    source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
														
 
															+    CC=icx
														
 
															+    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL=ON -DLLAMA_SYCL_F16=OFF"
														
 
															+    BUILD_DIR="../build/linux/${ARCH}/oneapi"
														
 
															+    EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
														
 
															+    DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
														
 
															+    build
														
 
															+
														
 
															+    # copy oneAPI dependencies
														
 
															+    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
														
 
															+        cp "${dep}" "${BUILD_DIR}/bin/"
														
 
															+    done
														
 
															+    cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/"
														
 
															+    cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/"
														
 
															+    cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/"
														
 
															+    cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/"
														
 
															+    cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/"
														
 
															+    cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/"
														
 
															+    cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/"
														
 
															+    compress
														
 
															+fi
														
 
															+
														
 
															 if [ -z "${ROCM_PATH}" ]; then
														
 
															     # Try the default location in case it exists
														
 
															     ROCM_PATH=/opt/rocm
														
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -289,6 +289,49 @@ function build_cuda() {
 
															     }
														
 
															 }
														
 
															+function build_oneapi() {
														
 
															+  if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${env:ONEAPI_ROOT}"))  {
														
 
															+    # Get oneAPI version
														
 
															+    $script:ONEAPI_VERSION = icpx --version
														
 
															+    $script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)').Value
														
 
															+    if ($null -ne $script:ONEAPI_VERSION) {
														
 
															+      $script:ONEAPI_VARIANT = "_v" + $script:ONEAPI_VERSION
														
 
															+    }
														
 
															+    init_vars
														
 
															+    $script:buildDir = "../build/windows/${script:ARCH}/oneapi$script:ONEAPI_VARIANT"
														
 
															+    $script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT"
														
 
															+    $script:cmakeDefs += @(
														
 
															+      "-G", "MinGW Makefiles",
														
 
															+      "-DLLAMA_SYCL=ON",
														
 
															+      "-DCMAKE_C_COMPILER=icx",
														
 
															+      "-DCMAKE_CXX_COMPILER=icx",
														
 
															+      "-DCMAKE_BUILD_TYPE=Release"
														
 
															+    )
														
 
															+
														
 
															+    Write-Host "Building oneAPI"
														
 
															+    build
														
 
															+    # Ninja doesn't prefix with config name
														
 
															+    if ($null -ne $script:DUMPBIN) {
														
 
															+      & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | Select-String ".dll"
														
 
															+    }
														
 
															+    sign
														
 
															+    install
														
 
															+
														
 
															+    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:distDir}"
														
 
															+    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:distDir}"
														
 
															+    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:distDir}"
														
 
															+    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:distDir}"
														
 
															+    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:distDir}"
														
 
															+    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:distDir}"
														
 
															+    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:distDir}"
														
 
															+    cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:distDir}"
														
 
															+    cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:distDir}"
														
 
															+    cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:distDir}"
														
 
															+  } else {
														
 
															+    Write-Host "Skipping oneAPI generation step"
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															 function build_rocm() {
														
 
															     if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
														
 
															         $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
														
@@ -356,6 +399,7 @@ if ($($args.count) -eq 0) {
 
															         build_cpu_avx
														
 
															         build_cpu_avx2
														
 
															         build_cuda
														
 
															+        build_oneapi
														
 
															         build_rocm
														
 
															     }