From d1f876c302a3c5ab3d5d486362c2e39a6f31c616 Mon Sep 17 00:00:00 2001 From: Minoru Osuka Date: Wed, 4 Jun 2025 21:45:33 +0900 Subject: [PATCH] Add test --- Cargo.lock | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 3 ++ src/extractor.rs | 38 ++++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index a878421..4594d15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -166,6 +166,34 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "errno" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi", +] + [[package]] name = "heck" version = "0.5.0" @@ -190,6 +218,12 @@ version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "litsea" version = "0.1.0" @@ -200,6 +234,7 @@ dependencies = [ "regex", "serde", "serde_json", + "tempfile", ] [[package]] @@ -220,6 +255,12 @@ dependencies = [ "libc", ] +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + [[package]] name = "once_cell_polyfill" version = "1.70.1" @@ -244,6 +285,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rayon" version = "1.10.0" @@ -293,6 +340,19 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rustix" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "ryu" version = "1.0.20" @@ -348,6 +408,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + [[package]] name = "unicode-ident" version = "1.0.18" @@ -360,6 +433,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "windows-sys" version = "0.59.0" @@ -432,3 +514,12 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] diff --git a/Cargo.toml b/Cargo.toml index ad4ee53..1083139 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,3 +21,6 @@ rayon = "1.10.0" regex = "1.10.5" serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.140" + +[dev-dependencies] +tempfile = "3.20.0" diff --git a/src/extractor.rs b/src/extractor.rs index 6efa325..fbb7b92 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -84,3 +84,41 @@ impl Extractor { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + + use std::fs::File; + use std::io::{Read, Write}; + + use tempfile::NamedTempFile; + + #[test] + fn test_extract() -> Result<(), Box> { + // Create a temporary file to simulate the corpus input + let mut corpus_file = NamedTempFile::new()?; + writeln!(corpus_file, "これ は テスト です 。")?; + writeln!(corpus_file, "別 の 文 も あり ます 。")?; + corpus_file.as_file().sync_all()?; + + // Create a temporary file for the features output + let features_file = NamedTempFile::new()?; + + // Create an instance of Extractor and extract features + let mut extractor = Extractor::new(); + extractor.extract(corpus_file.path(), features_file.path())?; + + // Read the output from the features file + let mut output = String::new(); + File::open(features_file.path())?.read_to_string(&mut output)?; + + // Check if the output is not empty + assert!(!output.is_empty(), "Extracted features should not be empty"); + + // Check if the output contains tab-separated values + assert!(output.contains("\t"), "Output should contain tab-separated values"); + + Ok(()) + } +}