summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--candle-datasets/Cargo.toml5
-rw-r--r--candle-datasets/src/hub.rs25
-rw-r--r--candle-datasets/src/lib.rs1
-rw-r--r--candle-examples/Cargo.toml1
4 files changed, 3 insertions, 29 deletions
diff --git a/candle-datasets/Cargo.toml b/candle-datasets/Cargo.toml
index 32c8b3b1..f4472a08 100644
--- a/candle-datasets/Cargo.toml
+++ b/candle-datasets/Cargo.toml
@@ -9,9 +9,6 @@ categories.workspace = true
license.workspace = true
readme = "README.md"
-[lib]
-crate-type=["dylib"]
-
[dependencies]
byteorder = { workspace = true }
candle = { path = "../candle-core", version = "0.2.0", package = "candle-core" }
@@ -22,5 +19,5 @@ memmap2 = { workspace = true }
tokenizers = { workspace = true, features = ["onig"] }
rand = { workspace = true }
thiserror = { workspace = true }
-parquet = { version = "45.0.0"}
+parquet = { workspace = true}
image = { workspace = true }
diff --git a/candle-datasets/src/hub.rs b/candle-datasets/src/hub.rs
index ae40bc57..b135e148 100644
--- a/candle-datasets/src/hub.rs
+++ b/candle-datasets/src/hub.rs
@@ -57,6 +57,7 @@ pub fn from_hub(api: &Api, dataset_id: String) -> Result<Vec<SerializedFileReade
#[cfg(test)]
mod tests {
use super::*;
+ use parquet::file::reader::FileReader;
#[test]
fn test_dataset() {
@@ -67,28 +68,6 @@ mod tests {
)
.unwrap();
assert_eq!(files.len(), 1);
-
- let mut rows = files.into_iter().flat_map(|r| r.into_iter());
-
- let row = rows.next().unwrap().unwrap();
- let mut col_iter = row.get_column_iter();
-
- // First element is an image
- col_iter.next();
- assert_eq!(
- col_iter.next().unwrap().1,
- &parquet::record::Field::Str("a drawing of a green pokemon with red eyes".to_string())
- );
-
- // Keep for now to showcase how to use.
- for row in rows {
- if let Ok(row) = row {
- for (_idx, (_name, field)) in row.get_column_iter().enumerate() {
- if let parquet::record::Field::Str(value) = field {
- println!("Value {value:?}");
- }
- }
- }
- }
+ assert_eq!(files[0].metadata().file_metadata().num_rows(), 20);
}
}
diff --git a/candle-datasets/src/lib.rs b/candle-datasets/src/lib.rs
index cc4304c2..bfd77a99 100644
--- a/candle-datasets/src/lib.rs
+++ b/candle-datasets/src/lib.rs
@@ -1,6 +1,5 @@
//! Datasets & Dataloaders for Candle
pub mod batcher;
-#[cfg(feature = "hub")]
pub mod hub;
pub mod nlp;
pub mod vision;
diff --git a/candle-examples/Cargo.toml b/candle-examples/Cargo.toml
index cc1f1e4c..90165c19 100644
--- a/candle-examples/Cargo.toml
+++ b/candle-examples/Cargo.toml
@@ -41,7 +41,6 @@ tracing-subscriber = { workspace = true }
wav = { workspace = true }
# Necessary to disambiguate with tokio in wasm examples which are 1.28.1
tokio = "1.29.1"
-parquet = "45.0.0"
image.workspace = true
[build-dependencies]