From 82464166e4d947a717509922a566e7ceaf4b3f2f Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 28 Jul 2023 12:07:39 +0200 Subject: 3rd phase. --- candle-book/src/SUMMARY.md | 10 ++-- candle-book/src/cuda/README.md | 1 + candle-book/src/cuda/porting.md | 1 + candle-book/src/cuda/writing.md | 1 + candle-book/src/error_manage.md | 38 ++++++++++++++ candle-book/src/inference/README.md | 6 +++ candle-book/src/inference/hub.md | 79 ++++++++++++++++++++++++++++++ candle-book/src/inference/serialization.md | 2 + candle-book/src/training/serialization.md | 1 + 9 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 candle-book/src/cuda/README.md create mode 100644 candle-book/src/cuda/porting.md create mode 100644 candle-book/src/cuda/writing.md create mode 100644 candle-book/src/training/serialization.md (limited to 'candle-book') diff --git a/candle-book/src/SUMMARY.md b/candle-book/src/SUMMARY.md index ddd6e916..e35a865f 100644 --- a/candle-book/src/SUMMARY.md +++ b/candle-book/src/SUMMARY.md @@ -12,11 +12,11 @@ - [Running a model](inference/README.md) - [Using the hub](inference/hub.md) - - [Serialization](inference/serialization.md) - - [Advanced Cuda usage](inference/cuda/README.md) - - [Writing a custom kernel](inference/cuda/writing.md) - - [Porting a custom kernel](inference/cuda/porting.md) - [Error management](error_manage.md) +- [Advanced Cuda usage](cuda/README.md) + - [Writing a custom kernel](cuda/writing.md) + - [Porting a custom kernel](cuda/porting.md) +- [Using MKL](advanced/mkl.md) - [Creating apps](apps/README.md) - [Creating a WASM app](apps/wasm.md) - [Creating a REST api webserver](apps/rest.md) @@ -24,4 +24,4 @@ - [Training](training/README.md) - [MNIST](training/mnist.md) - [Fine-tuning](training/finetuning.md) -- [Using MKL](advanced/mkl.md) + - [Serialization](training/serialization.md) diff --git a/candle-book/src/cuda/README.md b/candle-book/src/cuda/README.md new file mode 100644 index 00000000..68434cbf --- /dev/null +++ b/candle-book/src/cuda/README.md @@ -0,0 +1 @@ +# Advanced Cuda usage diff --git a/candle-book/src/cuda/porting.md b/candle-book/src/cuda/porting.md new file mode 100644 index 00000000..e332146d --- /dev/null +++ b/candle-book/src/cuda/porting.md @@ -0,0 +1 @@ +# Porting a custom kernel diff --git a/candle-book/src/cuda/writing.md b/candle-book/src/cuda/writing.md new file mode 100644 index 00000000..0fe1f3dc --- /dev/null +++ b/candle-book/src/cuda/writing.md @@ -0,0 +1 @@ +# Writing a custom kernel diff --git a/candle-book/src/error_manage.md b/candle-book/src/error_manage.md index 042e191f..af7593d6 100644 --- a/candle-book/src/error_manage.md +++ b/candle-book/src/error_manage.md @@ -1 +1,39 @@ # Error management + +You might have seen in the code base a lot of `.unwrap()` or `?`. +If you're unfamiliar with Rust check out the [Rust book](https://doc.rust-lang.org/book/ch09-02-recoverable-errors-with-result.html) +for more information. + +What's important to know though, is that if you want to know *where* a particular operation failed +You can simply use `RUST_BACKTRACE=1` to get the location of where the model actually failed. + +Let's see on failing code: + +```rust,ignore +let x = Tensor::zeros((1, 784), DType::F32, &device)?; +let y = Tensor::zeros((1, 784), DType::F32, &device)?; +let z = x.matmul(&y)?; +``` + +Will print at runtime: + +```bash +Error: ShapeMismatchBinaryOp { lhs: [1, 784], rhs: [1, 784], op: "matmul" } +``` + + +After adding `RUST_BACKTRACE=1`: + + +```bash +Error: WithBacktrace { inner: ShapeMismatchBinaryOp { lhs: [1, 784], rhs: [1, 784], op: "matmul" }, backtrace: Backtrace [{ fn: "candle::error::Error::bt", file: "/home/nicolas/.cargo/git/checkouts/candle-5bb8ef7e0626d693/f291065/candle-core/src/error.rs", line: 200 }, { fn: "candle::tensor::Tensor::matmul", file: "/home/nicolas/.cargo/git/checkouts/candle-5bb8ef7e0626d693/f291065/candle-core/src/tensor.rs", line: 816 }, { fn: "myapp::main", file: "./src/main.rs", line: 29 }, { fn: "core::ops::function::FnOnce::call_once", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/ops/function.rs", line: 250 }, { fn: "std::sys_common::backtrace::__rust_begin_short_backtrace", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/sys_common/backtrace.rs", line: 135 }, { fn: "std::rt::lang_start::{{closure}}", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 166 }, { fn: "core::ops::function::impls:: for &F>::call_once", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/ops/function.rs", line: 284 }, { fn: "std::panicking::try::do_call", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 500 }, { fn: "std::panicking::try", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 464 }, { fn: "std::panic::catch_unwind", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panic.rs", line: 142 }, { fn: "std::rt::lang_start_internal::{{closure}}", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 148 }, { fn: "std::panicking::try::do_call", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 500 }, { fn: "std::panicking::try", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs", line: 464 }, { fn: "std::panic::catch_unwind", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panic.rs", line: 142 }, { fn: "std::rt::lang_start_internal", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 148 }, { fn: "std::rt::lang_start", file: "/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/rt.rs", line: 165 }, { fn: "main" }, { fn: "__libc_start_main" }, { fn: "_start" }] } +``` + +Not super pretty at the moment, but we can see error occured on `{ fn: "myapp::main", file: "./src/main.rs", line: 29 }` + + +Another thing to note, is that since Rust is compiled it is not necessarily as easy to recover proper stacktraces +especially in release builds. We're using [`anyhow`](https://docs.rs/anyhow/latest/anyhow/) for that. +The library is still young, please [report](https://github.com/LaurentMazare/candle/issues) any issues detecting where an error is coming from. + + diff --git a/candle-book/src/inference/README.md b/candle-book/src/inference/README.md index c82f85e1..1b75a310 100644 --- a/candle-book/src/inference/README.md +++ b/candle-book/src/inference/README.md @@ -1 +1,7 @@ # Running a model + + +In order to run an existing model, you will need to download and use existing weights. +Most models are already available on https://huggingface.co/ in [`safetensors`](https://github.com/huggingface/safetensors) format. + +Let's get started by running an old model : `bert-base-uncased`. diff --git a/candle-book/src/inference/hub.md b/candle-book/src/inference/hub.md index 6242c070..8cf375d7 100644 --- a/candle-book/src/inference/hub.md +++ b/candle-book/src/inference/hub.md @@ -1 +1,80 @@ # Using the hub + +Install the [`hf-hub`](https://github.com/huggingface/hf-hub) crate: + +```bash +cargo add hf-hub +``` + +Then let's start by downloading the [model file](https://huggingface.co/bert-base-uncased/tree/main). + + +```rust +# extern crate candle; +# extern crate hf_hub; +use hf_hub::api::sync::Api; +use candle::Device; + +let api = Api::new().unwrap(); +let repo = api.model("bert-base-uncased".to_string()); + +let weights = repo.get("model.safetensors").unwrap(); + +let weights = candle::safetensors::load(weights, &Device::Cpu); +``` + +We now have access to all the [tensors](https://huggingface.co/bert-base-uncased?show_tensors=true) within the file. + + +## Using async + +`hf-hub` comes with an async API. + +```bash +cargo add hf-hub --features tokio +``` + +```rust,ignore +# extern crate candle; +# extern crate hf_hub; +use hf_hub::api::tokio::Api; +use candle::Device; + +let api = Api::new().unwrap(); +let repo = api.model("bert-base-uncased".to_string()); + +let weights = repo.get("model.safetensors").await.unwrap(); + +let weights = candle::safetensors::load(weights, &Device::Cpu); +``` + + +## Using in a real model. + +Now that we have our weights, we can use them in our bert architecture: + +```rust +# extern crate candle; +# extern crate candle_nn; +# extern crate hf_hub; +# use hf_hub::api::sync::Api; +# use candle::Device; +# +# let api = Api::new().unwrap(); +# let repo = api.model("bert-base-uncased".to_string()); +# +# let weights = repo.get("model.safetensors").unwrap(); +use candle_nn::Linear; + +let weights = candle::safetensors::load(weights, &Device::Cpu); + +let weight = weights.get("bert.encoder.layer.0.attention.self.query.weight").unwrap(); +let bias = weights.get("bert.encoder.layer.0.attention.self.query.bias").unwrap(); + +let linear = Linear::new(weight, Some(bias)); + +let input_ids = Tensor::zeros((3, 7680), DType::F32, &Device::Cpu).unwrap(); +let output = linear.forward(&input_ids); +``` + +For a full reference, you can check out the full [bert](https://github.com/LaurentMazare/candle/tree/main/candle-examples/examples/bert) example. diff --git a/candle-book/src/inference/serialization.md b/candle-book/src/inference/serialization.md index 0dfc62d3..133ff025 100644 --- a/candle-book/src/inference/serialization.md +++ b/candle-book/src/inference/serialization.md @@ -1 +1,3 @@ # Serialization + +Once you have a r diff --git a/candle-book/src/training/serialization.md b/candle-book/src/training/serialization.md new file mode 100644 index 00000000..0dfc62d3 --- /dev/null +++ b/candle-book/src/training/serialization.md @@ -0,0 +1 @@ +# Serialization -- cgit v1.2.3 From 45642a8530fdfbd64fcac118aed59b7cb7dfaf45 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 1 Aug 2023 15:04:41 +0200 Subject: Fixing examples. --- candle-book/src/inference/hub.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'candle-book') diff --git a/candle-book/src/inference/hub.md b/candle-book/src/inference/hub.md index 8cf375d7..de514322 100644 --- a/candle-book/src/inference/hub.md +++ b/candle-book/src/inference/hub.md @@ -58,20 +58,20 @@ Now that we have our weights, we can use them in our bert architecture: # extern crate candle_nn; # extern crate hf_hub; # use hf_hub::api::sync::Api; -# use candle::Device; # # let api = Api::new().unwrap(); # let repo = api.model("bert-base-uncased".to_string()); # # let weights = repo.get("model.safetensors").unwrap(); +use candle::{Device, Tensor, DType}; use candle_nn::Linear; -let weights = candle::safetensors::load(weights, &Device::Cpu); +let weights = candle::safetensors::load(weights, &Device::Cpu).unwrap(); let weight = weights.get("bert.encoder.layer.0.attention.self.query.weight").unwrap(); let bias = weights.get("bert.encoder.layer.0.attention.self.query.bias").unwrap(); -let linear = Linear::new(weight, Some(bias)); +let linear = Linear::new(weight.clone(), Some(bias.clone())); let input_ids = Tensor::zeros((3, 7680), DType::F32, &Device::Cpu).unwrap(); let output = linear.forward(&input_ids); -- cgit v1.2.3 From a44471a305f2bc768c4f0dd0e7d23a7cfe3cb408 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 1 Aug 2023 16:36:53 +0200 Subject: Adding more details on how to load things. - Loading with memmap - Loading a sharded tensor - Moved some snippets to `candle-examples/src/lib.rs` This is because managing book specific dependencies is a pain https://github.com/rust-lang/mdBook/issues/706 - This causes a non aligned inclusion https://github.com/rust-lang/mdBook/pull/1856 which we have to ignore fmt to remove. mdbook might need some more love :) --- candle-book/src/inference/hub.md | 46 ++++++++++++++----- candle-core/src/safetensors.rs | 6 ++- candle-examples/Cargo.toml | 4 ++ candle-examples/src/lib.rs | 99 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 143 insertions(+), 12 deletions(-) (limited to 'candle-book') diff --git a/candle-book/src/inference/hub.md b/candle-book/src/inference/hub.md index de514322..01492df1 100644 --- a/candle-book/src/inference/hub.md +++ b/candle-book/src/inference/hub.md @@ -25,6 +25,8 @@ let weights = candle::safetensors::load(weights, &Device::Cpu); We now have access to all the [tensors](https://huggingface.co/bert-base-uncased?show_tensors=true) within the file. +You can check all the names of the tensors [here](https://huggingface.co/bert-base-uncased?show_tensors=true) + ## Using async @@ -35,17 +37,9 @@ cargo add hf-hub --features tokio ``` ```rust,ignore -# extern crate candle; -# extern crate hf_hub; -use hf_hub::api::tokio::Api; -use candle::Device; - -let api = Api::new().unwrap(); -let repo = api.model("bert-base-uncased".to_string()); - -let weights = repo.get("model.safetensors").await.unwrap(); - -let weights = candle::safetensors::load(weights, &Device::Cpu); +# This is tested directly in examples crate because it needs external dependencies unfortunately: +# See [this](https://github.com/rust-lang/mdBook/issues/706) +{{#include ../../../candle-examples/src/lib.rs:book_hub_1}} ``` @@ -78,3 +72,33 @@ let output = linear.forward(&input_ids); ``` For a full reference, you can check out the full [bert](https://github.com/LaurentMazare/candle/tree/main/candle-examples/examples/bert) example. + +## Memory mapping + +For more efficient loading, instead of reading the file, you could use [`memmap2`](https://docs.rs/memmap2/latest/memmap2/) + +**Note**: Be careful about memory mapping it seems to cause issues on [Windows, WSL](https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/5893) +and will definitely be slower on network mounted disk, because it will issue more read calls. + +```rust,ignore +{{#include ../../../candle-examples/src/lib.rs:book_hub_2}} +``` + +**Note**: This operation is **unsafe**. [See the safety notice](https://docs.rs/memmap2/latest/memmap2/struct.Mmap.html#safety). +In practice model files should never be modified, and the mmaps should be mostly READONLY anyway, so the caveat most likely does not apply, but always keep it in mind. + + +## Tensor Parallel Sharding + +When using multiple GPUs to use in Tensor Parallel in order to get good latency, you can load only the part of the Tensor you need. + +For that you need to use [`safetensors`](https://crates.io/crates/safetensors) directly. + +```bash +cargo add safetensors +``` + + +```rust,ignore +{{#include ../../../candle-examples/src/lib.rs:book_hub_3}} +``` diff --git a/candle-core/src/safetensors.rs b/candle-core/src/safetensors.rs index 1880a041..132fb914 100644 --- a/candle-core/src/safetensors.rs +++ b/candle-core/src/safetensors.rs @@ -242,7 +242,11 @@ fn convert_back(tensor: &Tensor) -> Result> { pub fn load>(filename: P, device: &Device) -> Result> { let data = std::fs::read(filename.as_ref())?; - let st = safetensors::SafeTensors::deserialize(&data)?; + load_buffer(&data[..], device) +} + +pub fn load_buffer(data: &[u8], device: &Device) -> Result> { + let st = safetensors::SafeTensors::deserialize(data)?; st.tensors() .into_iter() .map(|(name, view)| Ok((name, view.load(device)?))) diff --git a/candle-examples/Cargo.toml b/candle-examples/Cargo.toml index 0db960ca..d4544ef7 100644 --- a/candle-examples/Cargo.toml +++ b/candle-examples/Cargo.toml @@ -25,6 +25,7 @@ half = { workspace = true, optional = true } [dev-dependencies] anyhow = { workspace = true } byteorder = { workspace = true } +hf-hub = { workspace = true, features=["tokio"]} clap = { workspace = true } hf-hub = { workspace = true } memmap2 = { workspace = true } @@ -34,6 +35,9 @@ tracing = { workspace = true } tracing-chrome = { workspace = true } tracing-subscriber = { workspace = true } wav = { workspace = true } +# Necessary to disambiguate with tokio in wasm examples which are 1.28.1 +tokio = "1.29.1" +memmap2.workspace = true [build-dependencies] anyhow = { workspace = true } diff --git a/candle-examples/src/lib.rs b/candle-examples/src/lib.rs index 285aee04..3410026e 100644 --- a/candle-examples/src/lib.rs +++ b/candle-examples/src/lib.rs @@ -11,3 +11,102 @@ pub fn device(cpu: bool) -> Result { Ok(device) } } + +#[cfg(test)] +mod tests { + // NOTE: Waiting on https://github.com/rust-lang/mdBook/pull/1856 + #[rustfmt::skip] + #[tokio::test] + async fn book_hub_1() { +// ANCHOR: book_hub_1 +use candle::Device; +use hf_hub::api::tokio::Api; + +let api = Api::new().unwrap(); +let repo = api.model("bert-base-uncased".to_string()); + +let weights_filename = repo.get("model.safetensors").await.unwrap(); + +let weights = candle::safetensors::load(weights_filename, &Device::Cpu).unwrap(); +// ANCHOR_END: book_hub_1 + assert_eq!(weights.len(), 206); + } + + #[rustfmt::skip] + #[test] + fn book_hub_2() { +// ANCHOR: book_hub_2 +use candle::Device; +use hf_hub::api::sync::Api; +use memmap2::Mmap; +use std::fs; + +let api = Api::new().unwrap(); +let repo = api.model("bert-base-uncased".to_string()); +let weights_filename = repo.get("model.safetensors").unwrap(); + +let file = fs::File::open(weights_filename).unwrap(); +let mmap = unsafe { Mmap::map(&file).unwrap() }; +let weights = candle::safetensors::load_buffer(&mmap[..], &Device::Cpu).unwrap(); +// ANCHOR_END: book_hub_2 + assert_eq!(weights.len(), 206); + } + + #[rustfmt::skip] + #[test] + fn book_hub_3() { +// ANCHOR: book_hub_3 +use candle::{DType, Device, Tensor}; +use hf_hub::api::sync::Api; +use memmap2::Mmap; +use safetensors::slice::IndexOp; +use safetensors::SafeTensors; +use std::fs; + +let api = Api::new().unwrap(); +let repo = api.model("bert-base-uncased".to_string()); +let weights_filename = repo.get("model.safetensors").unwrap(); + +let file = fs::File::open(weights_filename).unwrap(); +let mmap = unsafe { Mmap::map(&file).unwrap() }; + +// Use safetensors directly +let tensors = SafeTensors::deserialize(&mmap[..]).unwrap(); +let view = tensors +.tensor("bert.encoder.layer.0.attention.self.query.weight") +.unwrap(); + +// We're going to load shard with rank 1, within a world_size of 4 +// We're going to split along dimension 0 doing VIEW[start..stop, :] +let rank = 1; +let world_size = 4; +let dim = 0; +let dtype = view.dtype(); +let mut tp_shape = view.shape().to_vec(); +let size = tp_shape[0]; + +if size % world_size != 0 { +panic!("The dimension is not divisble by `world_size`"); +} +let block_size = size / world_size; +let start = rank * block_size; +let stop = (rank + 1) * block_size; + +// Everything is expressed in tensor dimension +// bytes offsets is handled automatically for safetensors. + +let iterator = view.slice(start..stop).unwrap(); + +tp_shape[dim] = block_size; + +// Convert safetensors Dtype to candle DType +let dtype: DType = dtype.try_into().unwrap(); + +// TODO: Implement from_buffer_iterator to we can skip the extra CPU alloc. +let raw: Vec = iterator.into_iter().flatten().cloned().collect(); +let tp_tensor = Tensor::from_raw_buffer(&raw, dtype, &tp_shape, &Device::Cpu).unwrap(); +// ANCHOR_END: book_hub_3 + assert_eq!(view.shape(), &[768, 768]); + assert_eq!(tp_tensor.dims(), &[192, 768]); + } +} -- cgit v1.2.3 From a70b95f9e7f7e5aa66e647b51cb2849228077a47 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 1 Aug 2023 16:49:35 +0200 Subject: Marking unwritten chapters as Draft (disables the link). --- candle-book/src/SUMMARY.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'candle-book') diff --git a/candle-book/src/SUMMARY.md b/candle-book/src/SUMMARY.md index e35a865f..3432f66f 100644 --- a/candle-book/src/SUMMARY.md +++ b/candle-book/src/SUMMARY.md @@ -12,16 +12,16 @@ - [Running a model](inference/README.md) - [Using the hub](inference/hub.md) -- [Error management](error_manage.md) -- [Advanced Cuda usage](cuda/README.md) - - [Writing a custom kernel](cuda/writing.md) - - [Porting a custom kernel](cuda/porting.md) -- [Using MKL](advanced/mkl.md) -- [Creating apps](apps/README.md) - - [Creating a WASM app](apps/wasm.md) - - [Creating a REST api webserver](apps/rest.md) - - [Creating a desktop Tauri app](apps/dekstop.md) -- [Training](training/README.md) - - [MNIST](training/mnist.md) - - [Fine-tuning](training/finetuning.md) - - [Serialization](training/serialization.md) +- [Error management]() +- [Advanced Cuda usage]() + - [Writing a custom kernel]() + - [Porting a custom kernel]() +- [Using MKL]() +- [Creating apps]() + - [Creating a WASM app]() + - [Creating a REST api webserver]() + - [Creating a desktop Tauri app]() +- [Training]() + - [MNIST]() + - [Fine-tuning]() + - [Serialization]() -- cgit v1.2.3 From ae68635af9dfcae359f621dd3e1df3b3c3d97042 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 2 Aug 2023 18:16:50 +0200 Subject: Add small error management. --- candle-book/src/error_manage.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'candle-book') diff --git a/candle-book/src/error_manage.md b/candle-book/src/error_manage.md index af7593d6..c1a16bd9 100644 --- a/candle-book/src/error_manage.md +++ b/candle-book/src/error_manage.md @@ -36,4 +36,16 @@ Another thing to note, is that since Rust is compiled it is not necessarily as e especially in release builds. We're using [`anyhow`](https://docs.rs/anyhow/latest/anyhow/) for that. The library is still young, please [report](https://github.com/LaurentMazare/candle/issues) any issues detecting where an error is coming from. +## Cuda error management + +When running a model on Cuda, you might get a stacktrace not really representing the error. +The reason is that CUDA is async by nature, and therefore the error might be caught while you were sending totally different kernels. + +One way to avoid this is to use `CUDA_LAUNCH_BLOCKING=1` as an environment variable. This will force every kernel to be launched sequentially. +You might still however see the error happening on other kernels as the faulty kernel might exit without an error but spoiling some pointer for which the error will happen when dropping the `CudaSlice` only. + + +If this occurs, you can use [`compute-sanitizer`](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) +This tool is like `valgrind` but for cuda. It will help locate the errors in the kernels. + -- cgit v1.2.3 From 166f4d1101437eb36c938781ed0b9270d9a1c282 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 2 Aug 2023 18:35:31 +0200 Subject: `s/candle/candle_core/g` --- candle-book/src/inference/hub.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'candle-book') diff --git a/candle-book/src/inference/hub.md b/candle-book/src/inference/hub.md index 01492df1..a974a1fa 100644 --- a/candle-book/src/inference/hub.md +++ b/candle-book/src/inference/hub.md @@ -10,17 +10,17 @@ Then let's start by downloading the [model file](https://huggingface.co/bert-bas ```rust -# extern crate candle; +# extern crate candle_core; # extern crate hf_hub; use hf_hub::api::sync::Api; -use candle::Device; +use candle_core::Device; let api = Api::new().unwrap(); let repo = api.model("bert-base-uncased".to_string()); let weights = repo.get("model.safetensors").unwrap(); -let weights = candle::safetensors::load(weights, &Device::Cpu); +let weights = candle_core::safetensors::load(weights, &Device::Cpu); ``` We now have access to all the [tensors](https://huggingface.co/bert-base-uncased?show_tensors=true) within the file. @@ -48,7 +48,7 @@ cargo add hf-hub --features tokio Now that we have our weights, we can use them in our bert architecture: ```rust -# extern crate candle; +# extern crate candle_core; # extern crate candle_nn; # extern crate hf_hub; # use hf_hub::api::sync::Api; @@ -57,10 +57,10 @@ Now that we have our weights, we can use them in our bert architecture: # let repo = api.model("bert-base-uncased".to_string()); # # let weights = repo.get("model.safetensors").unwrap(); -use candle::{Device, Tensor, DType}; +use candle_core::{Device, Tensor, DType}; use candle_nn::Linear; -let weights = candle::safetensors::load(weights, &Device::Cpu).unwrap(); +let weights = candle_core::safetensors::load(weights, &Device::Cpu).unwrap(); let weight = weights.get("bert.encoder.layer.0.attention.self.query.weight").unwrap(); let bias = weights.get("bert.encoder.layer.0.attention.self.query.bias").unwrap(); -- cgit v1.2.3 From 1b2b32e58d13ac96cee42562b845fcecfd3a08de Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 2 Aug 2023 18:59:36 +0200 Subject: Remove dead page.t --- candle-book/src/inference/serialization.md | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 candle-book/src/inference/serialization.md (limited to 'candle-book') diff --git a/candle-book/src/inference/serialization.md b/candle-book/src/inference/serialization.md deleted file mode 100644 index 133ff025..00000000 --- a/candle-book/src/inference/serialization.md +++ /dev/null @@ -1,3 +0,0 @@ -# Serialization - -Once you have a r -- cgit v1.2.3 From dba31473d40c88fed22574ba96021dc59f25f3f7 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 2 Aug 2023 19:18:43 +0200 Subject: Typos and format and CD only when PR lands. --- .github/workflows/book-cd.yml | 2 -- candle-book/src/inference/hub.md | 4 ++-- candle-examples/src/lib.rs | 8 ++++---- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'candle-book') diff --git a/.github/workflows/book-cd.yml b/.github/workflows/book-cd.yml index fc693a78..e8149e38 100644 --- a/.github/workflows/book-cd.yml +++ b/.github/workflows/book-cd.yml @@ -1,7 +1,5 @@ name: Deploy Rust book on: - # TODO put this back only when merging after this PR lands. - pull_request: push: branches: - main diff --git a/candle-book/src/inference/hub.md b/candle-book/src/inference/hub.md index a974a1fa..b924b76d 100644 --- a/candle-book/src/inference/hub.md +++ b/candle-book/src/inference/hub.md @@ -67,8 +67,8 @@ let bias = weights.get("bert.encoder.layer.0.attention.self.query.bias").unwrap( let linear = Linear::new(weight.clone(), Some(bias.clone())); -let input_ids = Tensor::zeros((3, 7680), DType::F32, &Device::Cpu).unwrap(); -let output = linear.forward(&input_ids); +let input_ids = Tensor::zeros((3, 768), DType::F32, &Device::Cpu).unwrap(); +let output = linear.forward(&input_ids).unwrap(); ``` For a full reference, you can check out the full [bert](https://github.com/LaurentMazare/candle/tree/main/candle-examples/examples/bert) example. diff --git a/candle-examples/src/lib.rs b/candle-examples/src/lib.rs index 3410026e..2b6009b4 100644 --- a/candle-examples/src/lib.rs +++ b/candle-examples/src/lib.rs @@ -73,8 +73,8 @@ let mmap = unsafe { Mmap::map(&file).unwrap() }; // Use safetensors directly let tensors = SafeTensors::deserialize(&mmap[..]).unwrap(); let view = tensors -.tensor("bert.encoder.layer.0.attention.self.query.weight") -.unwrap(); + .tensor("bert.encoder.layer.0.attention.self.query.weight") + .unwrap(); // We're going to load shard with rank 1, within a world_size of 4 // We're going to split along dimension 0 doing VIEW[start..stop, :] @@ -86,7 +86,7 @@ let mut tp_shape = view.shape().to_vec(); let size = tp_shape[0]; if size % world_size != 0 { -panic!("The dimension is not divisble by `world_size`"); + panic!("The dimension is not divisble by `world_size`"); } let block_size = size / world_size; let start = rank * block_size; @@ -102,7 +102,7 @@ tp_shape[dim] = block_size; // Convert safetensors Dtype to candle DType let dtype: DType = dtype.try_into().unwrap(); -// TODO: Implement from_buffer_iterator to we can skip the extra CPU alloc. +// TODO: Implement from_buffer_iterator so we can skip the extra CPU alloc. let raw: Vec = iterator.into_iter().flatten().cloned().collect(); let tp_tensor = Tensor::from_raw_buffer(&raw, dtype, &tp_shape, &Device::Cpu).unwrap(); // ANCHOR_END: book_hub_3 -- cgit v1.2.3