diff options
author | Laurent Mazare <laurent.mazare@gmail.com> | 2023-10-18 16:29:38 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-18 16:29:38 +0100 |
commit | 86e7d539d253740d5a0579e6f53acc12e30d3e4c (patch) | |
tree | 42186de2fb48a03d1473486b3f7a643704ea624a /candle-transformers/src/quantized_nn.rs | |
parent | cb034506cdbf6f650038893762ac815669ddbb10 (diff) | |
download | candle-86e7d539d253740d5a0579e6f53acc12e30d3e4c.tar.gz candle-86e7d539d253740d5a0579e6f53acc12e30d3e4c.tar.bz2 candle-86e7d539d253740d5a0579e6f53acc12e30d3e4c.zip |
Add the quantized mpt model. (#1123)
* Add the quantized mpt model.
* Support the quantized model for replit-code.
Diffstat (limited to 'candle-transformers/src/quantized_nn.rs')
-rw-r--r-- | candle-transformers/src/quantized_nn.rs | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/candle-transformers/src/quantized_nn.rs b/candle-transformers/src/quantized_nn.rs index 1745327d..d71c3b60 100644 --- a/candle-transformers/src/quantized_nn.rs +++ b/candle-transformers/src/quantized_nn.rs @@ -59,6 +59,11 @@ pub fn layer_norm(size: usize, eps: f64, vb: VarBuilder) -> Result<candle_nn::La Ok(candle_nn::LayerNorm::new(weight, bias, eps)) } +pub fn layer_norm_no_bias(size: usize, eps: f64, vb: VarBuilder) -> Result<candle_nn::LayerNorm> { + let weight = vb.get(size, "weight")?.dequantize(vb.device())?; + Ok(candle_nn::LayerNorm::new_no_bias(weight, eps)) +} + pub fn linear_no_bias(in_dim: usize, out_dim: usize, vb: VarBuilder) -> Result<Linear> { let weight = QMatMul::new(in_dim, out_dim, vb)?; Ok(Linear { weight, bias: None }) |