0018-add-phi4-support.patch 4.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
  2. From: jmorganca <jmorganca@gmail.com>
  3. Date: Thu, 27 Feb 2025 15:12:26 -0800
  4. Subject: [PATCH] add phi4 support
  5. ---
  6. include/llama.h | 1 +
  7. src/llama-model.cpp | 10 +++++++---
  8. src/llama-vocab.cpp | 11 +++++++++++
  9. 3 files changed, 19 insertions(+), 3 deletions(-)
  10. diff --git a/include/llama.h b/include/llama.h
  11. index cc948005..16774711 100644
  12. --- a/include/llama.h
  13. +++ b/include/llama.h
  14. @@ -105,6 +105,7 @@ extern "C" {
  15. LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
  16. LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
  17. LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
  18. + LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
  19. };
  20. enum llama_rope_type {
  21. diff --git a/src/llama-model.cpp b/src/llama-model.cpp
  22. index 21819080..ab1a07d1 100644
  23. --- a/src/llama-model.cpp
  24. +++ b/src/llama-model.cpp
  25. @@ -2283,7 +2283,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
  26. // output
  27. output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), { n_embd }, 0);
  28. - output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), { n_embd, n_vocab }, 0);
  29. + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
  30. + // if output is NULL, init from the input tok embed
  31. + if (output == NULL) {
  32. + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
  33. + }
  34. for (int i = 0; i < n_layer; ++i) {
  35. auto & layer = layers[i];
  36. @@ -2298,8 +2302,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
  37. layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd }, 0);
  38. layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), { n_embd, 2 * n_ff }, 0);
  39. - layer.rope_long = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_LONG, "weight", i), { n_embd_head/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
  40. - layer.rope_short = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_SHORT, "weight", i), { n_embd_head/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
  41. + layer.rope_long = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_LONG, "weight", i), { n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
  42. + layer.rope_short = create_tensor(tn(LLM_TENSOR_ROPE_FACTORS_SHORT, "weight", i), { n_rot/2 }, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0));
  43. }
  44. } break;
  45. case LLM_ARCH_PHIMOE:
  46. diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
  47. index 1ca827eb..c7ff28be 100644
  48. --- a/src/llama-vocab.cpp
  49. +++ b/src/llama-vocab.cpp
  50. @@ -392,6 +392,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
  51. "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
  52. };
  53. break;
  54. + case LLAMA_VOCAB_PRE_TYPE_GPT4O:
  55. + // original regex from tokenizer.json
  56. + // [^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+
  57. + regex_exprs = {
  58. + "[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
  59. + };
  60. + break;
  61. default:
  62. // default regex for BPE tokenization pre-processing
  63. regex_exprs = {
  64. @@ -1583,6 +1590,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
  65. } else if (
  66. tokenizer_pre == "megrez") {
  67. pre_type = LLAMA_VOCAB_PRE_TYPE_QWEN2;
  68. + } else if (
  69. + tokenizer_pre == "gpt-4o") {
  70. + pre_type = LLAMA_VOCAB_PRE_TYPE_GPT4O;
  71. + clean_spaces = false;
  72. } else {
  73. LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__);
  74. pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;