//! Support for the [GGUF file format](https://github.com/philpax/ggml/blob/gguf-spec/docs/gguf.md). //! use super::{GgmlDType, QTensor}; use crate::{Device, Result}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use std::collections::HashMap; pub const DEFAULT_ALIGNMENT: u64 = 32; #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum Magic { Gguf, } impl TryFrom for Magic { type Error = crate::Error; fn try_from(value: u32) -> Result { let magic = match value { 0x46554747 | 0x47475546 => Self::Gguf, _ => crate::bail!("unknown magic 0x{value:08x}"), }; Ok(magic) } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum VersionedMagic { GgufV1, GgufV2, GgufV3, } impl VersionedMagic { fn read(reader: &mut R) -> Result { let magic = reader.read_u32::()?; let magic = Magic::try_from(magic)?; let version = reader.read_u32::()?; let versioned_magic = match (magic, version) { (Magic::Gguf, 1) => Self::GgufV1, (Magic::Gguf, 2) => Self::GgufV2, (Magic::Gguf, 3) => Self::GgufV3, _ => crate::bail!("gguf: unsupported magic/version {magic:?}/{version}"), }; Ok(versioned_magic) } } #[derive(Debug)] pub struct TensorInfo { pub ggml_dtype: GgmlDType, pub shape: crate::Shape, pub offset: u64, } impl TensorInfo { pub fn read( &self, reader: &mut R, tensor_data_offset: u64, device: &Device, ) -> Result { let tensor_elems = self.shape.elem_count(); let block_size = self.ggml_dtype.block_size(); if tensor_elems % block_size != 0 { crate::bail!( "the number of elements {tensor_elems} is not divisible by the block size {block_size}" ) } let size_in_bytes = tensor_elems / block_size * self.ggml_dtype.type_size(); let mut raw_data = vec![0u8; size_in_bytes]; reader.seek(std::io::SeekFrom::Start(tensor_data_offset + self.offset))?; reader.read_exact(&mut raw_data)?; super::ggml_file::qtensor_from_ggml( self.ggml_dtype, &raw_data, self.shape.dims().to_vec(), device, ) } } #[derive(Debug)] pub struct Content { pub magic: VersionedMagic, pub metadata: HashMap, pub tensor_infos: HashMap, pub tensor_data_offset: u64, } fn read_string(reader: &mut R, magic: &VersionedMagic) -> Result { let len = match magic { VersionedMagic::GgufV1 => reader.read_u32::()? as usize, VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => { reader.read_u64::()? as usize } }; let mut v = vec![0u8; len]; reader.read_exact(&mut v)?; // GGUF strings are supposed to be non-null terminated but in practice this happens. while let Some(0) = v.last() { v.pop(); } // GGUF strings are utf8 encoded but there are cases that don't seem to be valid. Ok(String::from_utf8_lossy(&v).into_owned()) } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ValueType { // The value is a 8-bit unsigned integer. U8, // The value is a 8-bit signed integer. I8, // The value is a 16-bit unsigned little-endian integer. U16, // The value is a 16-bit signed little-endian integer. I16, // The value is a 32-bit unsigned little-endian integer. U32, // The value is a 32-bit signed little-endian integer. I32, // The value is a 64-bit unsigned little-endian integer. U64, // The value is a 64-bit signed little-endian integer. I64, // The value is a 32-bit IEEE754 floating point number. F32, // The value is a 64-bit IEEE754 floating point number. F64, // The value is a boolean. // 1-byte value where 0 is false and 1 is true. // Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy. Bool, // The value is a UTF-8 non-null-terminated string, with length prepended. String, // The value is an array of other values, with the length and type prepended. // Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes. Array, } #[derive(Debug, Clone)] pub enum Value { U8(u8), I8(i8), U16(u16), I16(i16), U32(u32), I32(i32), U64(u64), I64(i64), F32(f32), F64(f64), Bool(bool), String(String), Array(Vec), } impl Value { pub fn value_type(&self) -> ValueType { match self { Self::U8(_) => ValueType::U8, Self::I8(_) => ValueType::I8, Self::U16(_) => ValueType::U16, Self::I16(_) => ValueType::I16, Self::U32(_) => ValueType::U32, Self::I32(_) => ValueType::I32, Self::U64(_) => ValueType::U64, Self::I64(_) => ValueType::I64, Self::F32(_) => ValueType::F32, Self::F64(_) => ValueType::F64, Self::Bool(_) => ValueType::Bool, Self::String(_) => ValueType::String, Self::Array(_) => ValueType::Array, } } pub fn to_u8(&self) -> Result { match self { Self::U8(v) => Ok(*v), v => crate::bail!("not a u8 {v:?}"), } } pub fn to_i8(&self) -> Result { match self { Self::I8(v) => Ok(*v), v => crate::bail!("not a i8 {v:?}"), } } pub fn to_u16(&self) -> Result { match self { Self::U16(v) => Ok(*v), v => crate::bail!("not a u16 {v:?}"), } } pub fn to_i16(&self) -> Result { match self { Self::I16(v) => Ok(*v), v => crate::bail!("not a i16 {v:?}"), } } pub fn to_u32(&self) -> Result { match self { Self::U32(v) => Ok(*v), v => crate::bail!("not a u32 {v:?}"), } } pub fn to_i32(&self) -> Result { match self { Self::I32(v) => Ok(*v), v => crate::bail!("not a i32 {v:?}"), } } /// This will also automatically upcast any integral types which will not truncate. pub fn to_u64(&self) -> Result { match self { Self::U64(v) => Ok(*v), // Autoupcast cases here Self::U8(v) => Ok(*v as u64), Self::U16(v) => Ok(*v as u64), Self::U32(v) => Ok(*v as u64), Self::Bool(v) => Ok(*v as u64), v => crate::bail!("not a u64 or upcastable to u64 {v:?}"), } } pub fn to_i64(&self) -> Result { match self { Self::I64(v) => Ok(*v), v => crate::bail!("not a i64 {v:?}"), } } pub fn to_f32(&self) -> Result { match self { Self::F32(v) => Ok(*v), v => crate::bail!("not a f32 {v:?}"), } } pub fn to_f64(&self) -> Result { match self { Self::F64(v) => Ok(*v), v => crate::bail!("not a f64 {v:?}"), } } pub fn to_bool(&self) -> Result { match self { Self::Bool(v) => Ok(*v), v => crate::bail!("not a bool {v:?}"), } } pub fn to_vec(&self) -> Result<&Vec> { match self { Self::Array(v) => Ok(v), v => crate::bail!("not a vec {v:?}"), } } pub fn to_string(&self) -> Result<&String> { match self { Self::String(v) => Ok(v), v => crate::bail!("not a string {v:?}"), } } fn read( reader: &mut R, value_type: ValueType, magic: &VersionedMagic, ) -> Result { let v = match value_type { ValueType::U8 => Self::U8(reader.read_u8()?), ValueType::I8 => Self::I8(reader.read_i8()?), ValueType::U16 => Self::U16(reader.read_u16::()?), ValueType::I16 => Self::I16(reader.read_i16::()?), ValueType::U32 => Self::U32(reader.read_u32::()?), ValueType::I32 => Self::I32(reader.read_i32::()?), ValueType::U64 => Self::U64(reader.read_u64::()?), ValueType::I64 => Self::I64(reader.read_i64::()?), ValueType::F32 => Self::F32(reader.read_f32::()?), ValueType::F64 => Self::F64(reader.read_f64::()?), ValueType::Bool => match reader.read_u8()? { 0 => Self::Bool(false), 1 => Self::Bool(true), b => crate::bail!("unexpected bool value {b}"), }, ValueType::String => Self::String(read_string(reader, magic)?), ValueType::Array => { let value_type = reader.read_u32::()?; let value_type = ValueType::from_u32(value_type)?; let len = match magic { VersionedMagic::GgufV1 => reader.read_u32::()? as usize, VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => { reader.read_u64::()? as usize } }; let mut vs = Vec::with_capacity(len); for _ in 0..len { vs.push(Value::read(reader, value_type, magic)?) } Self::Array(vs) } }; Ok(v) } fn write(&self, w: &mut W) -> Result<()> { match self { &Self::U8(v) => w.write_u8(v)?, &Self::I8(v) => w.write_i8(v)?, &Self::U16(v) => w.write_u16::(v)?, &Self::I16(v) => w.write_i16::(v)?, &Self::U32(v) => w.write_u32::(v)?, &Self::I32(v) => w.write_i32::(v)?, &Self::U64(v) => w.write_u64::(v)?, &Self::I64(v) => w.write_i64::(v)?, &Self::F32(v) => w.write_f32::(v)?, &Self::F64(v) => w.write_f64::(v)?, &Self::Bool(v) => w.write_u8(u8::from(v))?, Self::String(v) => write_string(w, v.as_str())?, Self::Array(v) => { // The `Value` type does not enforce that all the values in an Array have the same // type. let value_type = if v.is_empty() { // Doesn't matter, the array is empty. ValueType::U32 } else { let value_type: std::collections::HashSet<_> = v.iter().map(|elem| elem.value_type()).collect(); if value_type.len() != 1 { crate::bail!("multiple value-types in the same array {value_type:?}") } value_type.into_iter().next().unwrap() }; w.write_u32::(value_type.to_u32())?; w.write_u64::(v.len() as u64)?; for elem in v.iter() { elem.write(w)? } } } Ok(()) } } impl ValueType { fn from_u32(v: u32) -> Result { let v = match v { 0 => Self::U8, 1 => Self::I8, 2 => Self::U16, 3 => Self::I16, 4 => Self::U32, 5 => Self::I32, 6 => Self::F32, 7 => Self::Bool, 8 => Self::String, 9 => Self::Array, 10 => Self::U64, 11 => Self::I64, 12 => Self::F64, v => crate::bail!("unrecognized value-type {v:#08x}"), }; Ok(v) } fn to_u32(self) -> u32 { match self { Self::U8 => 0, Self::I8 => 1, Self::U16 => 2, Self::I16 => 3, Self::U32 => 4, Self::I32 => 5, Self::F32 => 6, Self::Bool => 7, Self::String => 8, Self::Array => 9, Self::U64 => 10, Self::I64 => 11, Self::F64 => 12, } } } impl Content { pub fn read(reader: &mut R) -> Result { let magic = VersionedMagic::read(reader)?; let tensor_count = match magic { VersionedMagic::GgufV1 => reader.read_u32::()? as usize, VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => { reader.read_u64::()? as usize } }; let metadata_kv_count = match magic { VersionedMagic::GgufV1 => reader.read_u32::()? as usize, VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => { reader.read_u64::()? as usize } }; let mut metadata = HashMap::new(); for _idx in 0..metadata_kv_count { let key = read_string(reader, &magic)?; let value_type = reader.read_u32::()?; let value_type = ValueType::from_u32(value_type)?; let value = Value::read(reader, value_type, &magic)?; metadata.insert(key, value); } let mut tensor_infos = HashMap::new(); for _idx in 0..tensor_count { let tensor_name = read_string(reader, &magic)?; let n_dimensions = reader.read_u32::()?; let mut dimensions: Vec = match magic { VersionedMagic::GgufV1 => { let mut dimensions = vec![0; n_dimensions as usize]; reader.read_u32_into::(&mut dimensions)?; dimensions.into_iter().map(|c| c as usize).collect() } VersionedMagic::GgufV2 | VersionedMagic::GgufV3 => { let mut dimensions = vec![0; n_dimensions as usize]; reader.read_u64_into::(&mut dimensions)?; dimensions.into_iter().map(|c| c as usize).collect() } }; dimensions.reverse(); let ggml_dtype = reader.read_u32::()?; let ggml_dtype = GgmlDType::from_u32(ggml_dtype)?; let offset = reader.read_u64::()?; tensor_infos.insert( tensor_name, TensorInfo { shape: crate::Shape::from(dimensions), offset, ggml_dtype, }, ); } let position = reader.stream_position()?; let alignment = match metadata.get("general.alignment") { Some(Value::U8(v)) => *v as u64, Some(Value::U16(v)) => *v as u64, Some(Value::U32(v)) => *v as u64, Some(Value::I8(v)) if *v >= 0 => *v as u64, Some(Value::I16(v)) if *v >= 0 => *v as u64, Some(Value::I32(v)) if *v >= 0 => *v as u64, _ => DEFAULT_ALIGNMENT, }; let tensor_data_offset = position.div_ceil(alignment) * alignment; Ok(Self { magic, metadata, tensor_infos, tensor_data_offset, }) } pub fn tensor( &self, reader: &mut R, name: &str, device: &Device, ) -> Result { let tensor_info = match self.tensor_infos.get(name) { Some(tensor_info) => tensor_info, None => crate::bail!("cannot find tensor info for {name}"), }; tensor_info.read(reader, self.tensor_data_offset, device) } } fn write_string(w: &mut W, str: &str) -> Result<()> { let bytes = str.as_bytes(); w.write_u64::(bytes.len() as u64)?; w.write_all(bytes)?; Ok(()) } pub fn write( w: &mut W, metadata: &[(&str, &Value)], tensors: &[(&str, &QTensor)], ) -> Result<()> { w.write_u32::(0x46554747)?; w.write_u32::(2)?; // version 2. w.write_u64::(tensors.len() as u64)?; w.write_u64::(metadata.len() as u64)?; for (name, value) in metadata.iter() { write_string(w, name)?; w.write_u32::(value.value_type().to_u32())?; value.write(w)?; } let mut offset = 0usize; let mut offsets = Vec::with_capacity(tensors.len()); for (name, tensor) in tensors.iter() { write_string(w, name)?; let dims = tensor.shape().dims(); w.write_u32::(dims.len() as u32)?; for &dim in dims.iter().rev() { w.write_u64::(dim as u64)?; } w.write_u32::(tensor.dtype().to_u32())?; w.write_u64::(offset as u64)?; offsets.push(offset); let size_in_bytes = tensor.storage_size_in_bytes(); let padding = 31 - (31 + size_in_bytes) % 32; offset += size_in_bytes + padding; } let pos = w.stream_position()? as usize; let padding = 31 - (31 + pos) % 32; w.write_all(&vec![0u8; padding])?; let tensor_start_pos = w.stream_position()? as usize; for (offset, (_name, tensor)) in offsets.iter().zip(tensors.iter()) { let pos = w.stream_position()? as usize; if tensor_start_pos + offset != pos { crate::bail!( "internal error, unexpected current position {tensor_start_pos} {offset} {pos}" ) } let data = tensor.data()?; let size_in_bytes = data.len(); w.write_all(&data)?; let padding = 31 - (31 + size_in_bytes) % 32; w.write_all(&vec![0u8; padding])?; } Ok(()) }