Skip to content

Commit 9241b0f

Browse files
committed
Sync ggml: add Q1_0 1-bit quantization support (CPU) (#21273)
1 parent 6ebab28 commit 9241b0f

2 files changed

Lines changed: 8 additions & 2 deletions

File tree

llama_cpp/_ggml.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ class GGMLStatus(enum.IntEnum):
121121
# // GGML_TYPE_IQ4_NL_8_8 = 38,
122122
# GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block)
123123
# GGML_TYPE_NVFP4 = 40, // NVFP4 (4 blocks, E4M3 scale)
124-
# GGML_TYPE_COUNT = 41,
124+
# GGML_TYPE_Q1_0 = 41,
125+
# GGML_TYPE_COUNT = 42,
125126
# };
126127
class GGMLType(enum.IntEnum):
127128
GGML_TYPE_F32 = 0
@@ -157,7 +158,8 @@ class GGMLType(enum.IntEnum):
157158
GGML_TYPE_TQ2_0 = 35
158159
GGML_TYPE_MXFP4 = 39
159160
GGML_TYPE_NVFP4 = 40
160-
GGML_TYPE_COUNT = 41
161+
GGML_TYPE_Q1_0 = 41
162+
GGML_TYPE_COUNT = 42
161163

162164

163165
# // precision
@@ -198,6 +200,7 @@ class GGMLPrec(enum.IntEnum):
198200
# GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
199201
# GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
200202
# GGML_FTYPE_MOSTLY_NVFP4 = 26, // except 1d tensors
203+
# GGML_FTYPE_MOSTLY_Q1_0 = 27, // except 1d tensors
201204
# };
202205
class GGMLFType(enum.IntEnum):
203206
GGML_FTYPE_UNKNOWN = -1
@@ -226,6 +229,7 @@ class GGMLFType(enum.IntEnum):
226229
GGML_FTYPE_MOSTLY_BF16 = 24
227230
GGML_FTYPE_MOSTLY_MXFP4 = 25
228231
GGML_FTYPE_MOSTLY_NVFP4 = 26
232+
GGML_FTYPE_MOSTLY_Q1_0 = 27
229233

230234

231235
# // available tensor operations:

llama_cpp/llama_cpp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@
351351
# LLAMA_FTYPE_MOSTLY_TQ2_0 = 37, // except 1d tensors
352352
# LLAMA_FTYPE_MOSTLY_MXFP4_MOE = 38, // except 1d tensors
353353
# LLAMA_FTYPE_MOSTLY_NVFP4 = 39, // except 1d tensors
354+
# LLAMA_FTYPE_MOSTLY_Q1_0 = 40, // except 1d tensors
354355
#
355356
# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
356357
# };
@@ -391,6 +392,7 @@
391392
LLAMA_FTYPE_MOSTLY_TQ2_0 = 37
392393
LLAMA_FTYPE_MOSTLY_MXFP4_MOE = 38
393394
LLAMA_FTYPE_MOSTLY_NVFP4 = 39
395+
LLAMA_FTYPE_MOSTLY_Q1_0 = 40
394396
LLAMA_FTYPE_GUESSED = 1024
395397

396398
# enum llama_rope_scaling_type {

0 commit comments

Comments
 (0)