From 6d8832c44f629828bffb3fb8436d927133ae8210 Mon Sep 17 00:00:00 2001
From: Minoru Osuka <minoru.osuka@gmail.com>
Date: Mon, 2 Jun 2025 23:38:50 +0900
Subject: [PATCH 01/15] Refactoring

---
 Cargo.toml       |   2 +-
 src/adaboost.rs  |  19 ++++----
 src/extractor.rs |  67 +++++++++++++++++++++++++++
 src/lib.rs       |   2 +
 src/main.rs      | 115 +++++++++++++++++------------------------------
 src/segmenter.rs |  27 +++++++----
 src/trainer.rs   |  48 ++++++++++++++++++++
 7 files changed, 186 insertions(+), 94 deletions(-)
 create mode 100644 src/extractor.rs
 create mode 100644 src/trainer.rs

diff --git a/Cargo.toml b/Cargo.toml
index 0652eae..ad4ee53 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ categories = ["text-processing"]
 license = "MIT"
 
 [features]
-default = [] # No directories included
+default = []
 
 [dependencies]
 clap = { version = "4.5.39", features = ["derive"] }
diff --git a/src/adaboost.rs b/src/adaboost.rs
index d2125f1..51b4fa3 100644
--- a/src/adaboost.rs
+++ b/src/adaboost.rs
@@ -1,6 +1,7 @@
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::fs::File;
 use std::io::{BufRead, BufReader, Write};
+use std::path::Path;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 
@@ -53,7 +54,7 @@ impl AdaBoost {
     /// * `filename`: The path to the file containing the features.
     /// # Returns: A result indicating success or failure.
     /// # Errors: Returns an error if the file cannot be opened or read.
-    pub fn initialize_features(&mut self, filename: &str) -> std::io::Result<()> {
+    pub fn initialize_features(&mut self, filename: &Path) -> std::io::Result<()> {
         let file = File::open(filename)?;
         let reader = BufReader::new(file);
         let mut map = BTreeMap::new(); // preserve order
@@ -100,7 +101,7 @@ impl AdaBoost {
     /// * `filename`: The path to the file containing the instances.
     /// # Returns: A result indicating success or failure.
     /// # Errors: Returns an error if the file cannot be opened or read.
-    pub fn initialize_instances(&mut self, filename: &str) -> std::io::Result<()> {
+    pub fn initialize_instances(&mut self, filename: &Path) -> std::io::Result<()> {
         let file = File::open(filename)?;
         let reader = BufReader::new(file);
         let bias = self.get_bias();
@@ -173,7 +174,7 @@ impl AdaBoost {
             // Find the best hypothesis
             let mut h_best = 0;
             let mut best_error_rate = positive_weight_sum / instance_weight_sum;
-            for h in 1..num_features {
+            for (h, _) in errors.iter().enumerate().take(num_features).skip(1) {
                 let mut e = errors[h] + positive_weight_sum;
                 e /= instance_weight_sum;
                 if (0.5 - e).abs() > (0.5 - best_error_rate).abs() {
@@ -232,7 +233,7 @@ impl AdaBoost {
     /// # Errors: Returns an error if the file cannot be created or written to.
     /// # Notes: The bias term is calculated as the negative sum of the weights divided by 2.
     /// The model is saved in a way that can be easily loaded later.
-    pub fn save_model(&self, filename: &str) -> std::io::Result<()> {
+    pub fn save_model(&self, filename: &Path) -> std::io::Result<()> {
         let mut file = File::create(filename)?;
         let mut bias = -self.model[0];
         for (h, &w) in self.features.iter().zip(self.model.iter()).skip(1) {
@@ -254,7 +255,7 @@ impl AdaBoost {
     /// # Errors: Returns an error if the file cannot be opened or read.
     /// # Notes: The model is loaded into the `features` and `model` vectors,
     /// and the bias is calculated as the negative sum of the weights divided by 2.
-    pub fn load_model(&mut self, filename: &str) -> std::io::Result<()> {
+    pub fn load_model(&mut self, filename: &Path) -> std::io::Result<()> {
         let file = File::open(filename)?;
         let reader = BufReader::new(file);
         let mut m: HashMap<String, f64> = HashMap::new();
@@ -313,12 +314,10 @@ impl AdaBoost {
                 } else {
                     pn += 1
                 }
+            } else if label > 0 {
+                np += 1
             } else {
-                if label > 0 {
-                    np += 1
-                } else {
-                    nn += 1
-                }
+                nn += 1
             }
         }
 
diff --git a/src/extractor.rs b/src/extractor.rs
new file mode 100644
index 0000000..6af0529
--- /dev/null
+++ b/src/extractor.rs
@@ -0,0 +1,67 @@
+use std::collections::HashSet;
+use std::error::Error;
+use std::fs::File;
+use std::io::{self, BufRead, Write};
+use std::path::Path;
+
+use crate::segmenter::Segmenter;
+
+pub struct Extractor {
+    segmenter: Segmenter,
+}
+
+impl Default for Extractor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Extractor {
+    pub fn new() -> Self {
+        Extractor {
+            segmenter: Segmenter::new(None),
+        }
+    }
+
+    pub fn extract(
+        &mut self,
+        corpus_path: &Path,
+        features_path: &Path,
+    ) -> Result<(), Box<dyn Error>> {
+        // Read sentences from stdin
+        // Each line is treated as a separate sentence
+        let corpus_file = File::open(corpus_path)?;
+        let corpus = io::BufReader::new(corpus_file);
+
+        // Create a file to write the features
+        let features_file = File::create(features_path)?;
+        let mut features = io::BufWriter::new(features_file);
+
+        // learner function to write features
+        // This function will be called for each word in the input sentences
+        // It takes a set of attributes and a label, and writes them to stdout
+        let mut learner = |attributes: HashSet<String>, label: i8| {
+            let mut attrs: Vec<String> = attributes.into_iter().collect();
+            attrs.sort();
+            let mut line = vec![label.to_string()];
+            line.extend(attrs);
+            writeln!(features, "{}", line.join("\t")).expect("Failed to write features");
+        };
+
+        for line in corpus.lines() {
+            match line {
+                Ok(line) => {
+                    let line = line.trim();
+                    if !line.is_empty() {
+                        self.segmenter.add_sentence_with_writer(line, &mut learner);
+                    }
+                }
+                Err(err) => {
+                    eprintln!("Error reading input: {}", err);
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index feca1db..3d62349 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,7 @@
 pub mod adaboost;
+pub mod extractor;
 pub mod segmenter;
+pub mod trainer;
 
 const VERERSION: &str = env!("CARGO_PKG_VERSION");
 
diff --git a/src/main.rs b/src/main.rs
index 6bccaae..fcc5934 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,15 +1,16 @@
-use std::collections::HashSet;
 use std::error::Error;
-use std::fs::File;
 use std::io::{self, BufRead, Write};
+use std::path::PathBuf;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 
 use clap::{Args, Parser, Subcommand};
 
 use litsea::adaboost::AdaBoost;
+use litsea::extractor::Extractor;
 use litsea::get_version;
 use litsea::segmenter::Segmenter;
+use litsea::trainer::Trainer;
 
 #[derive(Debug, Args)]
 #[clap(
@@ -18,8 +19,8 @@ use litsea::segmenter::Segmenter;
     version = get_version(),
 )]
 struct ExtractArgs {
-    corpus_file: String,
-    features_file: String,
+    corpus_file: PathBuf,
+    features_file: PathBuf,
 }
 
 #[derive(Debug, Args)]
@@ -38,10 +39,10 @@ struct TrainArgs {
     num_threads: usize,
 
     #[arg(short = 'm', long)]
-    load_model: Option<String>,
+    load_model_file: Option<PathBuf>,
 
-    instances_file: String,
-    model_file: String,
+    features_file: PathBuf,
+    model_file: PathBuf,
 }
 
 #[derive(Debug, Args)]
@@ -50,7 +51,7 @@ struct TrainArgs {
     version = get_version(),
 )]
 struct SegmentArgs {
-    model_file: String,
+    model_file: PathBuf,
 }
 
 #[derive(Debug, Subcommand)]
@@ -73,44 +74,11 @@ struct CommandArgs {
 }
 
 fn extract(args: ExtractArgs) -> Result<(), Box<dyn Error>> {
-    // Create a file to write the features
-    let features_file = File::create(&args.features_file)?;
-    let mut features = io::BufWriter::new(features_file);
-
-    // Initialize the segmenter
-    // No model is loaded, so it will use the default feature extraction
-    let mut segmenter = Segmenter::new(None);
-
-    // learner function to write features
-    // This function will be called for each word in the input sentences
-    // It takes a set of attributes and a label, and writes them to stdout
-    let mut learner = |attributes: HashSet<String>, label: i8| {
-        let mut attrs: Vec<String> = attributes.into_iter().collect();
-        attrs.sort();
-        let mut line = vec![label.to_string()];
-        line.extend(attrs);
-        writeln!(features, "{}", line.join("\t")).expect("Failed to write features");
-    };
-
-    // Read sentences from stdin
-    // Each line is treated as a separate sentence
-    let corpus_file = File::open(&args.corpus_file)?;
-    let corpus = io::BufReader::new(corpus_file);
-
-    for line in corpus.lines() {
-        match line {
-            Ok(line) => {
-                let line = line.trim();
-                if !line.is_empty() {
-                    segmenter.add_sentence_with_writer(line, &mut learner);
-                }
-            }
-            Err(err) => {
-                eprintln!("Error reading input: {}", err);
-            }
-        }
-    }
+    let mut extractor = Extractor::new();
+
+    extractor.extract(args.corpus_file.as_path(), args.features_file.as_path())?;
 
+    println!("Feature extraction completed successfully.");
     Ok(())
 }
 
@@ -127,56 +95,46 @@ fn train(args: TrainArgs) -> Result<(), Box<dyn Error>> {
     })
     .expect("Error setting Ctrl-C handler");
 
-    let mut boost = AdaBoost::new(args.threshold, args.num_iterations, args.num_threads);
+    let mut trainer = Trainer::new(
+        args.threshold,
+        args.num_iterations,
+        args.num_threads,
+        args.features_file.as_path(),
+    );
 
-    if let Some(model_path) = args.load_model.as_ref() {
-        boost.load_model(model_path).unwrap();
+    if let Some(model_path) = &args.load_model_file {
+        trainer.load_model(model_path.as_path())?;
     }
 
-    boost.initialize_features(&args.instances_file).unwrap();
-    boost.initialize_instances(&args.instances_file).unwrap();
-
-    boost.train(running.clone());
-    boost.save_model(&args.model_file).unwrap();
-    boost.show_result();
+    trainer.train(running, args.model_file.as_path())?;
 
+    println!("Training completed successfully.");
     Ok(())
 }
 
 fn segment(args: SegmentArgs) -> Result<(), Box<dyn Error>> {
-    let model_path = &args.model_file;
+    let mut leaner = AdaBoost::new(0.01, 100, 1);
+    leaner.load_model(args.model_file.as_path())?;
 
-    let mut model = AdaBoost::new(0.01, 100, 1);
-    if let Err(e) = model.load_model(model_path) {
-        eprintln!("Failed to load model: {}", e);
-        std::process::exit(1);
-    }
-
-    let segmenter = Segmenter::new(Some(model));
+    let segmenter = Segmenter::new(Some(leaner));
     let stdin = io::stdin();
     let stdout = io::stdout();
     let mut writer = io::BufWriter::new(stdout.lock());
 
     for line in stdin.lock().lines() {
-        match line {
-            Ok(line) => {
-                let line = line.trim();
-                if line.is_empty() {
-                    continue;
-                }
-                let tokens = segmenter.parse(line);
-                writeln!(writer, "{}", tokens.join(" ")).expect("write failed");
-            }
-            Err(err) => {
-                eprintln!("Error reading input: {}", err);
-            }
+        let line = line?;
+        let line = line.trim();
+        if line.is_empty() {
+            continue;
         }
+        let tokens = segmenter.parse(line);
+        writeln!(writer, "{}", tokens.join(" "))?;
     }
 
     Ok(())
 }
 
-fn main() -> Result<(), Box<dyn Error>> {
+fn run() -> Result<(), Box<dyn std::error::Error>> {
     let args = CommandArgs::parse();
 
     match args.command {
@@ -185,3 +143,10 @@ fn main() -> Result<(), Box<dyn Error>> {
         Commands::Segment(args) => segment(args),
     }
 }
+
+fn main() {
+    if let Err(e) = run() {
+        eprintln!("Error: {}", e);
+        std::process::exit(1);
+    }
+}
diff --git a/src/segmenter.rs b/src/segmenter.rs
index 18f2d91..faabbc6 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -10,8 +10,10 @@ pub struct Segmenter {
 
 impl Segmenter {
     /// Creates a new Segmenter with the given AdaBoost learner or a default one
+    ///
     /// # Arguments
     /// * `learner` - An optional AdaBoost instance. If None, a default AdaBoost instance is created.
+    ///
     /// # Returns
     /// A new Segmenter instance with the specified or default AdaBoost learner.
     pub fn new(learner: Option<AdaBoost>) -> Self {
@@ -32,9 +34,11 @@ impl Segmenter {
         }
     }
 
-    /// gets the type of a character based on predefined patterns
+    /// Gets the type of a character based on predefined patterns.
+    ///
     /// # Arguments
     /// * `ch` - A string slice representing a single character.
+    ///
     /// # Returns
     /// A static string representing the type of the character, such as "M", "H", "I", "K", "A", "N", or "O" (for others).
     pub fn get_type(&self, ch: &str) -> &'static str {
@@ -46,10 +50,11 @@ impl Segmenter {
         "O"
     }
 
-    /// Adds a sentence to the segmenter with a custom writer function
+    /// Adds a sentence to the segmenter with a custom writer function.
+    ///
     /// # Arguments
     /// * `sentence` - A string slice representing the sentence to be added.
-    /// * `writer` - A closure that takes a HashSet of attributes and a label (i8) as arguments.
+    /// * `writer` - A closure that takes a `HashSet<String>` of attributes and a label (`i8`) as arguments.
     ///   This closure is called for each word in the sentence, allowing custom handling of the attributes and label.
     pub fn add_sentence_with_writer<F>(&mut self, sentence: &str, mut writer: F)
     where
@@ -91,12 +96,14 @@ impl Segmenter {
         }
     }
 
-    /// Adds a sentence to the segmenter for training
+    /// Adds a sentence to the segmenter for training.
+    ///
     /// # Arguments
     /// * `sentence` - A string slice representing the sentence to be added.
+    ///
     /// This method processes the sentence, extracts features, and adds them to the AdaBoost learner.
     /// It constructs attributes based on the characters and their types, and uses the AdaBoost learner to add instances.
-    /// If the sentence is empty or too short, it does nothing.
+    ///   If the sentence is empty or too short, it does nothing.
     pub fn add_sentence(&mut self, sentence: &str) {
         if sentence.is_empty() {
             return;
@@ -130,14 +137,16 @@ impl Segmenter {
         for i in 4..(chars.len() - 3) {
             let label = if tags[i] == "B" { 1 } else { -1 };
             let attrs = self.get_attributes(i, &tags, &chars, &types);
-            // ★ ここで毎回 self.learner を呼ぶことで借用がぶつからない！
+            // Call the learner for each instance; doing so individually avoids borrowing conflicts.
             self.learner.add_instance(attrs, label);
         }
     }
 
-    /// Parses a sentence and segments it into words
+    /// Parses a sentence and segments it into words.
+    ///
     /// # Arguments
     /// * `sentence` - A string slice representing the sentence to be parsed.
+    ///
     /// # Returns
     /// A vector of strings, where each string is a segmented word from the sentence.
     pub fn parse(&self, sentence: &str) -> Vec<String> {
@@ -174,12 +183,14 @@ impl Segmenter {
         result
     }
 
-    /// Gets the attributes for a specific index in the character and type arrays
+    /// Gets the attributes for a specific index in the character and type arrays.
+    ///
     /// # Arguments
     /// * `i` - The index for which to get the attributes.
     /// * `tags` - A slice of strings representing the tags for each character.
     /// * `chars` - A slice of strings representing the characters in the sentence.
     /// * `types` - A slice of strings representing the types of each character.
+    ///
     /// # Returns
     /// A HashSet of strings representing the attributes for the specified index.
     fn get_attributes(
diff --git a/src/trainer.rs b/src/trainer.rs
new file mode 100644
index 0000000..101218d
--- /dev/null
+++ b/src/trainer.rs
@@ -0,0 +1,48 @@
+use std::path::Path;
+use std::sync::atomic::AtomicBool;
+use std::sync::Arc;
+
+use crate::adaboost::AdaBoost;
+
+pub struct Trainer {
+    learner: AdaBoost,
+}
+
+impl Trainer {
+    pub fn new(
+        threshold: f64,
+        num_iterations: usize,
+        num_threads: usize,
+        features_path: &Path,
+    ) -> Self {
+        let mut learner = AdaBoost::new(threshold, num_iterations, num_threads);
+
+        learner
+            .initialize_features(features_path)
+            .expect("Failed to initialize features");
+        learner
+            .initialize_instances(features_path)
+            .expect("Failed to initialize instances");
+
+        Trainer { learner }
+    }
+
+    pub fn load_model(&mut self, model_path: &Path) -> Result<(), Box<dyn std::error::Error>> {
+        // Load the model from the specified file
+        Ok(self.learner.load_model(model_path)?)
+    }
+
+    pub fn train(
+        &mut self,
+        running: Arc<AtomicBool>,
+        model_path: &Path,
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        self.learner.train(running.clone());
+
+        // Save the trained model to the specified file
+        self.learner.save_model(model_path)?;
+        self.learner.show_result();
+
+        Ok(())
+    }
+}

From e81be7e13162d7357ead38d06793fcb9b50e8e78 Mon Sep 17 00:00:00 2001
From: Minoru Osuka <minoru.osuka@gmail.com>
Date: Tue, 3 Jun 2025 10:02:54 +0900
Subject: [PATCH 02/15] Add docs

---
 README.md        |  7 +++++++
 src/adaboost.rs  | 37 +++++++++++++++++++++++--------------
 src/extractor.rs | 19 +++++++++++++++++++
 src/main.rs      | 33 +++++++++++++++++++++++++++++++++
 src/segmenter.rs | 10 +++++++---
 src/trainer.rs   | 38 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 127 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 09bd431..727f0ed 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,12 @@ Extract the information and features from the corpus:
 ./target/release/litsea extract ./resources/corpus.txt ./resources/features.txt
 ```
 
+The output from the `extract` command is similar to:
+
+```text
+Feature extraction completed successfully.
+```
+
 Train the features output by the above command using AdaBoost. Training stops if the new weak classifier’s accuracy falls below 0.001 or after 10,000 iterations.
 
 ```sh
@@ -81,6 +87,7 @@ Accuracy: 100.00% (61 / 61)
 Precision: 100.00% (24 / 24)
 Recall: 100.00% (24 / 24)
 Confusion Matrix: TP: 24, FP: 0, FN: 0, TN: 37
+Training completed successfully.
 ```
 
 ## How to segment sentences into words
diff --git a/src/adaboost.rs b/src/adaboost.rs
index 51b4fa3..cfa1188 100644
--- a/src/adaboost.rs
+++ b/src/adaboost.rs
@@ -11,7 +11,6 @@ type Label = i8;
 /// This implementation uses a simple feature extraction method
 /// and is designed for educational purposes.
 /// It is not optimized for performance or large datasets.
-///
 #[derive(Debug)]
 pub struct AdaBoost {
     pub threshold: f64,
@@ -27,11 +26,15 @@ pub struct AdaBoost {
 }
 
 impl AdaBoost {
-    /// Creates a new [`AdaBoost`].
+    /// Creates a new instance of [`AdaBoost`].
+    /// This method initializes the AdaBoost parameters such as threshold,
+    /// number of iterations, and number of threads.
+    ///
     /// # Arguments
     /// * `threshold`: The threshold for stopping the training.
     /// * `num_iterations`: The maximum number of iterations for training.
     /// * `num_threads`: The number of threads to use for training (not used in this implementation).
+    ///
     /// # Returns: A new instance of [`AdaBoost`].
     pub fn new(threshold: f64, num_iterations: usize, num_threads: usize) -> Self {
         AdaBoost {
@@ -50,9 +53,12 @@ impl AdaBoost {
 
     /// Initializes the features from a file.
     /// The file should contain lines with a label followed by space-separated features.
+    ///
     /// # Arguments
     /// * `filename`: The path to the file containing the features.
+    ///
     /// # Returns: A result indicating success or failure.
+    ///
     /// # Errors: Returns an error if the file cannot be opened or read.
     pub fn initialize_features(&mut self, filename: &Path) -> std::io::Result<()> {
         let file = File::open(filename)?;
@@ -97,9 +103,12 @@ impl AdaBoost {
 
     /// Initializes the instances from a file.
     /// The file should contain lines with a label followed by space-separated features.
+    ///
     /// # Arguments
     /// * `filename`: The path to the file containing the instances.
+    ///
     /// # Returns: A result indicating success or failure.
+    ///
     /// # Errors: Returns an error if the file cannot be opened or read.
     pub fn initialize_instances(&mut self, filename: &Path) -> std::io::Result<()> {
         let file = File::open(filename)?;
@@ -141,9 +150,9 @@ impl AdaBoost {
 
     /// Trains the AdaBoost model.
     /// This method iteratively updates the model based on the training data.
+    ///
     /// # Arguments
     /// * `running`: An `Arc<AtomicBool>` to control the running state of the training process.
-    /// # Notes: The training process will stop if `running` is set to false.
     pub fn train(&mut self, running: Arc<AtomicBool>) {
         let num_features = self.features.len();
 
@@ -227,12 +236,13 @@ impl AdaBoost {
     /// Saves the trained model to a file.
     /// The model is saved in a format where each line contains a feature and its weight,
     /// with the last line containing the bias term.
+    ///
     /// # Arguments
     /// * `filename`: The path to the file where the model will be saved.
+    ///
     /// # Returns: A result indicating success or failure.
+    ///
     /// # Errors: Returns an error if the file cannot be created or written to.
-    /// # Notes: The bias term is calculated as the negative sum of the weights divided by 2.
-    /// The model is saved in a way that can be easily loaded later.
     pub fn save_model(&self, filename: &Path) -> std::io::Result<()> {
         let mut file = File::create(filename)?;
         let mut bias = -self.model[0];
@@ -249,12 +259,13 @@ impl AdaBoost {
     /// Loads a model from a file.
     /// The file should contain lines with a feature and its weight,
     /// with the last line containing the bias term.
+    ///
     /// # Arguments
     /// * `filename`: The path to the file containing the model.
+    ///
     /// # Returns: A result indicating success or failure.
+    ///
     /// # Errors: Returns an error if the file cannot be opened or read.
-    /// # Notes: The model is loaded into the `features` and `model` vectors,
-    /// and the bias is calculated as the negative sum of the weights divided by 2.
     pub fn load_model(&mut self, filename: &Path) -> std::io::Result<()> {
         let file = File::open(filename)?;
         let reader = BufReader::new(file);
@@ -283,16 +294,14 @@ impl AdaBoost {
 
     /// Gets the bias term of the model.
     /// The bias is calculated as the negative sum of the model weights divided by 2.
-    /// # Returns: The bias term as a `f64`.
-    /// # Notes: This is used to adjust the decision boundary of the model.
+    ///
+    /// # Returns:The bias term as a `f64`.
     pub fn get_bias(&self) -> f64 {
         -self.model.iter().sum::<f64>() / 2.0
     }
 
     /// Displays the result of the model's performance on the training data.
     /// It calculates accuracy, precision, recall, and confusion matrix.
-    /// # Notes: This method iterates through the instances, calculates the score for each,
-    /// and counts true positives, false positives, true negatives, and false negatives.
     pub fn show_result(&self) {
         let bias = self.get_bias();
         let mut pp = 0;
@@ -342,11 +351,10 @@ impl AdaBoost {
 
     /// Adds a new instance to the model.
     /// The instance is represented by a set of attributes and a label.
+    ///
     /// # Arguments
     /// * `attributes`: A `HashSet<String>` containing the attributes of the instance.
     /// * `label`: The label of the instance, represented as an `i8`.
-    /// # Notes: The attributes are sorted and added to the `features` vector if they do not already exist.
-    /// The instance is stored in `instances_buf`, and its start and end indices are recorded in `instances`.
     pub fn add_instance(&mut self, attributes: HashSet<String>, label: i8) {
         let start = self.instances_buf.len();
         let mut attrs: Vec<String> = attributes.into_iter().collect();
@@ -369,10 +377,11 @@ impl AdaBoost {
     }
 
     /// Predicts the label for a given set of attributes.
+    ///
     /// # Arguments
     /// * `attributes`: A `HashSet<String>` containing the attributes to predict.
+    ///
     /// # Returns: The predicted label as an `i8`, where 1 indicates a positive prediction and -1 indicates a negative prediction.
-    /// # Notes: The prediction is made by calculating the score based on the model weights for the given attributes.
     pub fn predict(&self, attributes: HashSet<String>) -> i8 {
         let mut score = 0.0;
         for attr in attributes {
diff --git a/src/extractor.rs b/src/extractor.rs
index 6af0529..6efa325 100644
--- a/src/extractor.rs
+++ b/src/extractor.rs
@@ -6,23 +6,42 @@ use std::path::Path;
 
 use crate::segmenter::Segmenter;
 
+/// Extractor struct for processing text data and extracting features.
+/// It reads sentences from a corpus file, segments them into words,
+/// and writes the extracted features to a specified output file.
 pub struct Extractor {
     segmenter: Segmenter,
 }
 
 impl Default for Extractor {
+    /// Creates a new instance of [`Extractor`] with default settings.
+    ///
+    /// # Returns
+    /// Returns a new instance of `Extractor`.
     fn default() -> Self {
         Self::new()
     }
 }
 
 impl Extractor {
+    /// Creates a new instance of [`Extractor`].
+    ///
+    /// # Returns
+    /// Returns a new instance of `Extractor` with a new `Segmenter`.
     pub fn new() -> Self {
         Extractor {
             segmenter: Segmenter::new(None),
         }
     }
 
+    /// Extracts features from a corpus file and writes them to a specified output file.
+    ///
+    /// # Arguments
+    /// * `corpus_path` - The path to the input corpus file containing sentences.
+    /// * `features_path` - The path to the output file where extracted features will be written.
+    ///
+    /// # Returns
+    /// Returns a Result indicating success or failure.
     pub fn extract(
         &mut self,
         corpus_path: &Path,
diff --git a/src/main.rs b/src/main.rs
index fcc5934..e31f697 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -12,6 +12,7 @@ use litsea::get_version;
 use litsea::segmenter::Segmenter;
 use litsea::trainer::Trainer;
 
+/// Arguments for the extract command.
 #[derive(Debug, Args)]
 #[clap(
     author,
@@ -23,6 +24,7 @@ struct ExtractArgs {
     features_file: PathBuf,
 }
 
+/// Arguments for the train command.
 #[derive(Debug, Args)]
 #[clap(author,
     about = "Train a segmenter",
@@ -45,6 +47,7 @@ struct TrainArgs {
     model_file: PathBuf,
 }
 
+/// Arguments for the segment command.
 #[derive(Debug, Args)]
 #[clap(author,
     about = "Segment a sentence",
@@ -54,6 +57,7 @@ struct SegmentArgs {
     model_file: PathBuf,
 }
 
+/// Subcommands for lietsea CLI.
 #[derive(Debug, Subcommand)]
 enum Commands {
     Extract(ExtractArgs),
@@ -61,6 +65,7 @@ enum Commands {
     Segment(SegmentArgs),
 }
 
+/// Arguments for the litsea command.
 #[derive(Debug, Parser)]
 #[clap(
     name = "litsea",
@@ -73,6 +78,15 @@ struct CommandArgs {
     command: Commands,
 }
 
+/// Extract features from a corpus file and write them to a specified output file.
+/// This function reads sentences from the corpus file, segments them into words,
+/// and writes the extracted features to the output file.
+///
+/// # Arguments
+/// * `args` - The arguments for the extract command [`ExtractArgs`].
+///
+/// # Returns
+/// Returns a Result indicating success or failure.
 fn extract(args: ExtractArgs) -> Result<(), Box<dyn Error>> {
     let mut extractor = Extractor::new();
 
@@ -82,6 +96,15 @@ fn extract(args: ExtractArgs) -> Result<(), Box<dyn Error>> {
     Ok(())
 }
 
+/// Train a segmenter using the provided arguments.
+/// This function initializes a Trainer with the specified parameters,
+/// loads a model if specified, and trains the model using the features file.
+///
+/// # Arguments
+/// * `args` - The arguments for the train command [`TrainArgs`].
+///
+/// # Returns
+/// Returns a Result indicating success or failure.
 fn train(args: TrainArgs) -> Result<(), Box<dyn Error>> {
     let running = Arc::new(AtomicBool::new(true));
     let r = running.clone();
@@ -112,6 +135,16 @@ fn train(args: TrainArgs) -> Result<(), Box<dyn Error>> {
     Ok(())
 }
 
+/// Segment a sentence using the trained model.
+/// This function loads the AdaBoost model from the specified file,
+/// reads sentences from standard input, segments them into words,
+/// and writes the segmented sentences to standard output.
+///
+/// # Arguments
+/// * `args` - The arguments for the segment command [`SegmentArgs`].
+///
+/// # Returns
+/// Returns a Result indicating success or failure.
 fn segment(args: SegmentArgs) -> Result<(), Box<dyn Error>> {
     let mut leaner = AdaBoost::new(0.01, 100, 1);
     leaner.load_model(args.model_file.as_path())?;
diff --git a/src/segmenter.rs b/src/segmenter.rs
index faabbc6..2a58084 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -3,13 +3,14 @@ use regex::Regex;
 use std::collections::HashSet;
 
 /// Segmenter struct for text segmentation using AdaBoost
+/// It uses predefined patterns to classify characters and segments sentences into words.
 pub struct Segmenter {
     patterns: Vec<(Regex, &'static str)>,
     pub learner: AdaBoost,
 }
 
 impl Segmenter {
-    /// Creates a new Segmenter with the given AdaBoost learner or a default one
+    /// creates a new instance of [`Segmenter`].
     ///
     /// # Arguments
     /// * `learner` - An optional AdaBoost instance. If None, a default AdaBoost instance is created.
@@ -55,7 +56,10 @@ impl Segmenter {
     /// # Arguments
     /// * `sentence` - A string slice representing the sentence to be added.
     /// * `writer` - A closure that takes a `HashSet<String>` of attributes and a label (`i8`) as arguments.
-    ///   This closure is called for each word in the sentence, allowing custom handling of the attributes and label.
+    ///
+    /// This closure is called for each instance created from the sentence.
+    /// This method processes the sentence, extracts features, and calls the writer function for each instance.
+    /// It constructs attributes based on the characters and their types, and uses the AdaBoost learner to add instances.
     pub fn add_sentence_with_writer<F>(&mut self, sentence: &str, mut writer: F)
     where
         F: FnMut(HashSet<String>, i8),
@@ -103,7 +107,7 @@ impl Segmenter {
     ///
     /// This method processes the sentence, extracts features, and adds them to the AdaBoost learner.
     /// It constructs attributes based on the characters and their types, and uses the AdaBoost learner to add instances.
-    ///   If the sentence is empty or too short, it does nothing.
+    /// If the sentence is empty or too short, it does nothing.
     pub fn add_sentence(&mut self, sentence: &str) {
         if sentence.is_empty() {
             return;
diff --git a/src/trainer.rs b/src/trainer.rs
index 101218d..dd76a94 100644
--- a/src/trainer.rs
+++ b/src/trainer.rs
@@ -4,11 +4,28 @@ use std::sync::Arc;
 
 use crate::adaboost::AdaBoost;
 
+/// Trainer struct for managing the AdaBoost training process.
+/// It initializes the AdaBoost learner with the specified parameters,
+/// loads the model from a file, and provides methods to train the model
+/// and save the trained model.
 pub struct Trainer {
     learner: AdaBoost,
 }
 
 impl Trainer {
+    /// Creates a new instance of [`Trainer`].
+    ///
+    /// # Arguments
+    /// * `threshold` - The threshold for the AdaBoost algorithm.
+    /// * `num_iterations` - The number of iterations for the training.
+    /// * `num_threads` - The number of threads to use for training.
+    /// * `features_path` - The path to the features file.
+    ///
+    /// # Returns
+    /// Returns a new instance of `Trainer`.
+    ///
+    /// # Errors
+    /// Returns an error if the features or instances cannot be initialized.
     pub fn new(
         threshold: f64,
         num_iterations: usize,
@@ -27,11 +44,32 @@ impl Trainer {
         Trainer { learner }
     }
 
+    /// Load Model from a file
+    ///
+    /// # Arguments
+    /// * `model_path` - The path to the model file to load.    
+    ///
+    /// # Returns
+    /// Returns a Result indicating success or failure.
+    ///
+    /// # Errors
+    /// Returns an error if the model cannot be loaded.
     pub fn load_model(&mut self, model_path: &Path) -> Result<(), Box<dyn std::error::Error>> {
         // Load the model from the specified file
         Ok(self.learner.load_model(model_path)?)
     }
 
+    /// Train the AdaBoost model.
+    ///
+    /// # Arguments
+    /// * `running` - An Arc<AtomicBool> to control the running state of the training process.
+    /// * `model_path` - The path to save the trained model.
+    ///
+    /// # Returns
+    /// Returns a Result indicating success or failure.
+    ///
+    /// # Errors
+    /// Returns an error if the training fails or if the model cannot be saved.
     pub fn train(
         &mut self,
         running: Arc<AtomicBool>,

From e48c93e37db89140ea61b6d50aff826974b11590 Mon Sep 17 00:00:00 2001
From: Minoru OSUKA <minoru.osuka@gmail.com>
Date: Tue, 3 Jun 2025 11:47:30 +0900
Subject: [PATCH 03/15] Add tests (#3)

* Add tests

* Update doc

* Rename
---
 src/segmenter.rs | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/segmenter.rs b/src/segmenter.rs
index 2a58084..ec613a8 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -29,6 +29,7 @@ impl Segmenter {
             (Regex::new(r"[a-zA-Zａ-ｚＡ-Ｚ]").unwrap(), "A"),
             (Regex::new(r"[0-9０-９]").unwrap(), "N"),
         ];
+
         Segmenter {
             patterns,
             learner: learner.unwrap_or_else(|| AdaBoost::new(0.01, 100, 1)),
@@ -42,13 +43,13 @@ impl Segmenter {
     ///
     /// # Returns
     /// A static string representing the type of the character, such as "M", "H", "I", "K", "A", "N", or "O" (for others).
-    pub fn get_type(&self, ch: &str) -> &'static str {
-        for (pattern, s_type) in &self.patterns {
+    pub fn get_type(&self, ch: &str) -> &str {
+        for (pattern, label) in &self.patterns {
             if pattern.is_match(ch) {
-                return s_type;
+                return label;
             }
         }
-        "O"
+        "O" // Other
     }
 
     /// Adds a sentence to the segmenter with a custom writer function.
@@ -269,3 +270,36 @@ impl Segmenter {
         .collect()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::path::PathBuf;
+
+    use super::*;
+
+    #[test]
+    fn test_segmenter() {
+        let model_file = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("./resources")
+            .join("RWCP.model");
+
+        let mut learner = AdaBoost::new(0.01, 100, 1);
+        learner.load_model(model_file.as_path()).unwrap();
+
+        let mut segmenter = Segmenter::new(Some(learner));
+        let sentence = "これはテストです。";
+        segmenter.add_sentence(sentence);
+        let result = segmenter.parse(sentence);
+        assert!(!result.is_empty());
+        assert_eq!(result.len(), 5); // Adjust based on expected segmentation
+    }
+
+    #[test]
+    fn test_get_type() {
+        let segmenter = Segmenter::new(None);
+        assert_eq!(segmenter.get_type("あ"), "I"); // Hiragana
+        assert_eq!(segmenter.get_type("漢"), "H"); // Kanji
+        assert_eq!(segmenter.get_type("A"), "A"); // Latin
+        assert_eq!(segmenter.get_type("1"), "N"); // Digit
+    }
+}

From 6ad062567c76d1e11fe7662741b20633f69b33b2 Mon Sep 17 00:00:00 2001
From: Minoru OSUKA <minoru.osuka@gmail.com>
Date: Tue, 3 Jun 2025 19:25:02 +0900
Subject: [PATCH 04/15] Add rustfmt.toml (#4)

* Add rustfmt.toml

* Fix regex patterns
---
 resources/RWCP.model.bak | 1341 ++++++++++++++++++++++++++++++++++++++
 rustfmt.toml             |   80 +++
 src/adaboost.rs          |   37 +-
 src/segmenter.rs         |   36 +-
 4 files changed, 1457 insertions(+), 37 deletions(-)
 create mode 100644 resources/RWCP.model.bak
 create mode 100644 rustfmt.toml

diff --git a/resources/RWCP.model.bak b/resources/RWCP.model.bak
new file mode 100644
index 0000000..eef6ffa
--- /dev/null
+++ b/resources/RWCP.model.bak
@@ -0,0 +1,1341 @@
+-0.0332
+UW6:ン	-0.0496
+UW6:連	0.0463
+UW6:ル	-0.0673
+UW6:か	0.0241
+UW6:業	-0.0697
+UW6:,	0.0227
+UW6:者	0.1811
+UW6:.	0.0808
+UW6:福	0.0974
+UW6:後	0.0535
+UW6:広	-0.0695
+UW6:ﾙ	-0.0673
+UW6:Ｅ１	0.0306
+UW6:ﾝ	-0.0496
+UW6:員	-0.1212
+UW6:に	-0.0149
+UW6:学	-0.096
+UW6:郎	0.1082
+UW6:E1	0.0306
+UW6:相	0.0753
+UW6:も	-0.0206
+UW6:り	0.0187
+UW6:る	-0.0135
+UW6:社	-0.0507
+UW6:を	0.0195
+UW6:な	-0.0253
+UW6:中	0.0201
+UW6:と	-0.0105
+UW6:は	-0.0236
+UW6:の	-0.0417
+UW6:っ	0.0573
+UW6:で	0.0101
+UW6:て	-0.1014
+UW6:す	0.0383
+UW6:じ	0.1782
+UW6:た	-0.0428
+UW6:こ	-0.02
+UW6:会	0.0624
+UW6:空	-0.0822
+UW6:件	-0.08
+UW6:く	-0.0121
+UW6:が	-0.0073
+UW6:あ	-0.0307
+UW6:前	0.0302
+UW6:う	0.0189
+UW6:一	-0.0277
+UW6:１	-0.027
+UW6:市	0.0887
+UW6:委	0.0798
+UW6:区	0.1792
+UW6:1	-0.027
+UW6:、	0.0227
+UW6:。	0.0808
+BW1:には	0.1498
+BW1:Ｂ１あ	0.1404
+BW1:ため	0.0601
+BW1:大阪	0.1497
+BW1:に対	-0.0912
+BW1:引き	-0.1336
+BW1:から	0.3472
+BW1:れた	0.2369
+BW1:うん	0.0665
+BW1:,同	0.0727
+BW1:毎日	-0.2113
+BW1:やむ	-0.1947
+BW1:です	0.3445
+BW1:まで	0.1711
+BW1:いる	0.0672
+BW1:、同	0.0727
+BW1:｣と	0.1682
+BW1:、と	0.066
+BW1:よっ	-0.2565
+BW1:なっ	0.3015
+BW1:日本	-0.0195
+BW1:すで	-0.3399
+BW1:平方	-0.2314
+BW1:れで	-0.0913
+BW1:とい	-0.4915
+BW1:ませ	0.2448
+BW1:つい	-0.0802
+BW1:を見	0.0731
+BW1:てき	0.1249
+BW1:それ	-0.0871
+BW1:こん	-0.1262
+BW1:でき	0.1127
+BW1:をし	0.186
+BW1:てい	0.0805
+BW1:大き	-0.2604
+BW1:B1あ	0.1404
+BW1:の中	0.0741
+BW1:して	0.1104
+BW1:では	0.0844
+BW1:さら	-0.4143
+BW1:どこ	0.3887
+BW1:京都	0.2558
+BW1:いう	0.1743
+BW1:うし	-0.4817
+BW1:ない	0.5713
+BW1:にし	0.2468
+BW1:まま	0.26
+BW1:あっ	0.1505
+BW1:の一	-0.0501
+BW1:亡く	-0.1886
+BW1:こと	0.2083
+BW1:った	0.3463
+BW1:さん	0.4573
+BW1:にも	0.1671
+BW1:なん	-0.1113
+BW1:Ｂ１同	0.0542
+BW1:とみ	0.1922
+BW1:」と	0.1682
+BW1:いっ	-0.2055
+BW1:たち	0.1122
+BW1:本当	-0.2423
+BW1:,と	0.066
+BW1:がら	0.06
+BW1:こう	-0.079
+BW1:取り	-0.2784
+BW1:目指	-0.0724
+BW1:した	0.2641
+BW1:B1同	0.0542
+BW1:そこ	0.1977
+BW1:まる	-0.2155
+BW1:など	0.7379
+BW2:との	0.072
+BW2:われ	0.7901
+BW2:とと	-0.2279
+BW2:新聞	-0.4066
+BW2:を通	-1.1877
+BW2:とみ	0.5168
+BW2:朝鮮	-0.2355
+BW2:大阪	-0.2471
+BW2:同党	0.097
+BW2:とい	0.189
+BW2:がい	0.0853
+BW2:とこ	-0.1746
+BW2:11	-0.0669
+BW2:米国	-0.4268
+BW2:れて	0.0849
+BW2:日新	-0.0722
+BW2:れば	0.4114
+BW2:うか	0.249
+BW2:くな	-0.1597
+BW2:に関	-1.1388
+BW2:こと	-0.8392
+BW2:かし	-0.135
+BW2:この	-0.4193
+BW2:なん	0.3099
+BW2:府県	-0.2363
+BW2:にし	0.2748
+BW2:会社	-0.1116
+BW2:同日	-0.0913
+BW2:れた	0.427
+BW2:かも	-0.0602
+BW2:にな	0.2454
+BW2:然と	-0.1384
+BW2:に対	-1.4943
+BW2:から	-0.7194
+BW2:上が	-0.4479
+BW2:かれ	0.4612
+BW2:んな	-0.4115
+BW2:ては	-0.311
+BW2:めて	-0.3153
+BW2:んだ	0.0728
+BW2:らか	-0.0944
+BW2:一方	-0.1375
+BW2:にお	-0.1615
+BW2:分の	-0.7758
+BW2:まし	-0.1316
+BW2:てき	0.364
+BW2:てく	0.2551
+BW2:てい	0.6144
+BW2:らに	-0.1897
+BW2:手権	-0.1982
+BW2:一日	0.097
+BW2:りし	0.0651
+BW2:はず	-0.2532
+BW2:いう	-0.1609
+BW2:少な	-0.105
+BW2:はが	-0.1033
+BW2:はい	0.1073
+BW2:ろう	0.6067
+BW2:でも	-0.4203
+BW2:りま	0.162
+BW2:日本	-0.7068
+BW2:社会	-0.1276
+BW2:らし	-0.1611
+BW2:もの	-1.0713
+BW2:させ	0.4533
+BW2:..	-1.1822
+BW2:に従	-0.4688
+BW2:東京	-0.1543
+BW2:もい	0.223
+BW2:され	1.3168
+BW2:その	-0.3744
+BW2:たい	-0.1253
+BW2:たた	-0.0662
+BW2:一部	-0.1051
+BW2:ばれ	0.1813
+BW2:委員	-0.125
+BW2:った	0.4589
+BW2:さん	-0.3977
+BW2:たは	-0.0939
+BW2:一人	0.0602
+BW2:って	0.1647
+BW2:たと	0.1224
+BW2:っと	-0.2094
+BW2:たち	-0.0786
+BW2:ただ	-0.3857
+BW2:立て	-0.099
+BW2:まれ	0.5409
+BW2:出て	0.2163
+BW2:ても	-0.3065
+BW2:まで	-0.6621
+BW2:１１	-0.0669
+BW2:年度	-0.8669
+BW2:なの	0.2614
+BW2:など	-0.6509
+BW2:でし	-0.3828
+BW2:第に	-0.1612
+BW2:曜日	-0.0601
+BW2:です	-0.4761
+BW2:なが	-0.1313
+BW2:でい	0.2666
+BW2:ない	-0.2488
+BW2:でき	-0.1528
+BW2:して	0.0972
+BW2:きた	0.1941
+BW2:ので	-0.7059
+BW2:のに	-0.6041
+BW2:しな	0.0939
+BW2:のの	-0.6125
+BW2:本人	-0.2697
+BW2:――	-0.573
+BW2:しい	-0.1819
+BW2:によ	-0.7236
+BW2:のか	0.2093
+BW2:しか	-0.0545
+BW2:年間	-0.1626
+BW2:がら	-0.3198
+BW2:とも	-0.3941
+BW2:−−	-1.3175
+BW2:した	0.5078
+BW2:日米	0.3372
+BW3:との	0.0541
+BW3:われ	-0.0605
+BW3:だ。	0.4098
+BW3:す.	-0.131
+BW3:い。	-0.1185
+BW3:か。	0.2857
+BW3:がっ	-0.0913
+BW3:がけ	-0.1127
+BW3:とう	-0.1387
+BW3:ず、	0.3426
+BW3:新聞	-0.5055
+BW3:日,	0.0974
+BW3:とし	0.2266
+BW3:会議	0.086
+BW3:がき	-0.4855
+BW3:れて	0.1375
+BW3:する	0.6521
+BW3:ず,	0.3426
+BW3:に,	-0.1021
+BW3:られ	0.682
+BW3:かけ	-0.0743
+BW3:こと	0.7397
+BW3:この	0.1542
+BW3:かに	-0.0669
+BW3:かっ	-0.4098
+BW3:てい	0.624
+BW3:が、	0.1816
+BW3:うち	0.1117
+BW3:れた	0.185
+BW3:にな	0.1906
+BW3:には	0.2644
+BW3:かり	-0.267
+BW3:から	0.652
+BW3:は、	0.1337
+BW3:まっ	-0.1549
+BW3:まで	0.6154
+BW3:んで	0.0798
+BW3:んだ	0.0606
+BW3:うと	0.4798
+BW3:ころ	-0.2757
+BW3:ます	0.6943
+BW3:てお	0.0855
+BW3:入り	0.1232
+BW3:にし	0.1771
+BW3:いえ	0.2079
+BW3:す。	-0.131
+BW3:いく	0.3029
+BW3:ずに	0.0841
+BW3:いい	0.5308
+BW3:るる	0.3818
+BW3:れ、	0.0854
+BW3:いた	0.2056
+BW3:の子	-0.1
+BW3:だっ	0.1004
+BW3:いっ	0.1883
+BW3:ｶ月	0.099
+BW3:か.	0.2857
+BW3:けど	0.1374
+BW3:た。	0.8875
+BW3:社会	0.2024
+BW3:さい	-0.0714
+BW3:らし	0.1479
+BW3:い.	-0.1185
+BW3:始め	0.1681
+BW3:の、	-0.0724
+BW3:が,	0.1816
+BW3:たい	-0.0594
+BW3:った	-0.4748
+BW3:さを	0.0976
+BW3:たの	0.0812
+BW3:日、	0.0974
+BW3:って	0.03
+BW3:べき	0.2181
+BW3:の,	-0.0724
+BW3:に、	-0.1021
+BW3:そう	0.0428
+BW3:カ月	0.099
+BW3:まれ	-0.0793
+BW3:ても	0.0302
+BW3:大会	0.2217
+BW3:たり	-0.1183
+BW3:たる	-0.0853
+BW3:では	0.2295
+BW3:など	0.2135
+BW3:いる	0.56
+BW3:し、	0.1557
+BW3:いわ	0.1527
+BW3:た.	0.8875
+BW3:ある	0.3846
+BW3:あり	0.0719
+BW3:れる	0.1091
+BW3:でに	-0.1482
+BW3:は,	0.1337
+BW3:です	0.1437
+BW3:なく	-0.0903
+BW3:ない	0.1796
+BW3:して	0.1449
+BW3:市	0.0965
+BW3:きた	0.1645
+BW3:しな	0.2608
+BW3:れ,	0.0854
+BW3:どう	0.4664
+BW3:しま	0.12
+BW3:まし	0.1113
+BW3:だ.	0.4098
+BW3:しい	-0.3714
+BW3:し,	0.1557
+BW3:えと	0.1454
+BW3:れば	-0.3246
+BW3:あた	-0.2194
+BW3:がり	-0.2064
+BW3:がら	-0.4977
+BW3:とも	-0.3543
+BW3:した	0.3562
+UC3:A	-0.137
+UC3:I	0.2311
+TW4:からな	-0.2348
+TW4:ません	0.1097
+TW4:という	0.1349
+TW4:ました	0.5543
+TW4:ようと	-0.4258
+TW4:たが、	0.1516
+TW4:してい	0.2958
+TW4:たが,	0.1516
+TW4:ている	0.1538
+TW4:いう。	0.8576
+TW4:いう.	0.8576
+TW4:よると	0.5865
+UC1:A	0.0484
+UC1:K	0.0093
+UC1:M	0.0645
+UC1:O	-0.0505
+UC6:I	-0.0253
+UC6:H	-0.0506
+UC6:K	0.0087
+UC6:M	0.0247
+UC6:O	-0.0387
+UW3:・	-0.3794
+UW3:調	-0.0562
+UW3:ン	0.0278
+UW3:ロ	0.2201
+UW3:ル	0.1591
+UW3:度	0.1452
+UW3:非	0.2066
+UW3:ム	0.1109
+UW3:府	0.1605
+UW3:ト	0.0521
+UW3:く	0.1004
+UW3:ッ	-0.135
+UW3:広	-0.103
+UW3:李	0.3094
+UW3:部	0.12
+UW3:予	-0.1193
+UW3:郡	0.4404
+UW3:二	0.0974
+UW3:法	0.1868
+UW3:員	0.4513
+UW3:森	0.2438
+UW3:村	0.0364
+UW3:郎	0.1026
+UW3:ｸﾞ	0.1319
+UW3:力	0.0365
+UW3:い	0.1006
+UW3:東	-0.0805
+UW3:ほ	-0.5516
+UW3:へ	0.1199
+UW3:主	-0.0758
+UW3:ま	-0.4384
+UW3:ひ	-0.2171
+UW3:共	-0.188
+UW3:ふ	-0.1798
+UW3:開	-0.1432
+UW3:六	0.0755
+UW3:公	-0.303
+UW3:中	0.0653
+UW3:と	0.1691
+UW3:は	0.4555
+UW3:全	0.1574
+UW3:間	0.1302
+UW3:っ	-0.1444
+UW3:ち	-0.0521
+UW3:型	0.1389
+UW3:で	0.2318
+UW3:て	0.6167
+UW3:両	0.3815
+UW3:つ	-0.1081
+UW3:せ	0.3685
+UW3:す	0.0584
+UW3:た	0.0842
+UW3:そ	-0.5228
+UW3:党	0.3593
+UW3:こ	-0.3552
+UW3:げ	0.0401
+UW3:け	0.0388
+UW3:し	-0.0395
+UW3:さ	-0.1058
+UW3:ご	-0.3116
+UW3:か	-0.1163
+UW3:お	-0.4864
+UW3:え	0.1983
+UW3:下	-0.1759
+UW3:が	0.3271
+UW3:あ	-0.2696
+UW3:う	0.2342
+UW3:元	0.4858
+UW3:一	-0.1619
+UW3:政	-0.2013
+UW3:区	0.4646
+UW3:税	0.0401
+UW3:系	0.3066
+UW3:化	0.1327
+UW3:北	-0.1038
+UW3:口	0.0483
+UW3:右	0.1233
+UW3:駅	0.162
+UW3:戸	-0.0488
+UW3:知	-0.1528
+UW3:−	-0.1723
+UW3:妻	0.2016
+UW3:金	0.2163
+UW3:込	-0.1504
+UW3:無	0.0979
+UW3:よ	-0.0202
+UW3:わ	-0.1207
+UW3:を	0.662
+UW3:学	-0.1356
+UW3:当	-0.3885
+UW3:保	-0.2439
+UW3:再	0.3095
+UW3:円	0.5807
+UW3:約	0.3663
+UW3:的	0.7313
+UW3:級	0.1384
+UW3:ｱ	0.0551
+UW3:ｽ	0.0874
+UW3:1	-0.08
+UW3:･	-0.3794
+UW3:ｯ	-0.135
+UW3:市	0.3197
+UW3:用	0.0914
+UW3:能	0.0725
+UW3:別	0.1129
+UW3:昨	-0.0661
+UW3:町	0.1215
+UW3:何	0.4265
+UW3:初	0.2475
+UW3:作	-0.0361
+UW3:決	-0.1073
+UW3:低	0.0811
+UW3:生	-0.0273
+UW3:月	0.4125
+UW3:数	0.3222
+UW3:最	-0.0937
+UW3:選	-0.0681
+UW3:雨	0.2009
+UW3:立	-0.096
+UW3:期	0.036
+UW3:電	-0.1045
+UW3:｣	0.267
+UW3:費	0.1777
+UW3:業	0.0484
+UW3:,	0.4889
+UW3:者	0.6457
+UW3:教	-0.1479
+UW3:務	-0.1872
+UW3:動	-0.0949
+UW3:財	-0.0733
+UW3:指	-0.3973
+UW3:車	0.1835
+UW3:軍	0.1375
+UW3:国	0.0642
+UW3:統	-0.4229
+UW3:直	-0.1835
+UW3:日	0.2099
+UW3:旧	0.5792
+UW3:千	-0.2309
+UW3:午	-0.0783
+UW3:協	-0.1006
+UW3:外	-0.0241
+UW3:建	-0.2352
+UW3:特	-0.385
+UW3:自	-0.2869
+UW3:物	0.0461
+UW3:平	-0.1804
+UW3:海	-0.0495
+UW3:人	0.2742
+UW3:〓	-0.3573
+UW3:」	0.267
+UW3:、	0.4889
+UW3:々	-0.2311
+UW3:長	0.0421
+UW3:〇	0.5827
+UW3:思	-0.1291
+UW3:安	-0.0423
+UW3:州	0.1155
+UW3:み	-0.012
+UW3:実	-0.1008
+UW3:得	0.1905
+UW3:通	-0.1136
+UW3:性	0.1822
+UW3:同	0.3906
+UW3:合	-0.0241
+UW3:各	0.3588
+UW3:時	-0.1248
+UW3:ﾛ	0.2201
+UW3:ﾙ	0.1591
+UW3:家	0.1078
+UW3:ﾝ	0.0278
+UW3:ﾑ	0.1109
+UW3:見	0.1044
+UW3:ﾄ	0.0521
+UW3:新	0.1764
+UW3:に	0.2745
+UW3:な	-0.2788
+UW3:文	-0.1489
+UW3:ど	-0.0899
+UW3:米	0.7767
+UW3:の	0.4056
+UW3:も	0.2323
+UW3:め	0.1205
+UW3:や	-0.0788
+UW3:り	0.0649
+UW3:る	0.5905
+UW3:氏	0.2613
+UW3:ら	0.0727
+UW3:今	0.0792
+UW3:核	0.5156
+UW3:れ	0.2773
+UW3:他	0.1889
+UW3:ん	-0.0518
+UW3:民	-0.1694
+UW3:場	0.1219
+UW3:副	0.4437
+UW3:ア	0.0551
+UW3:分	0.0457
+UW3:以	-0.1368
+UW3:曜	-0.0951
+UW3:グ	0.1319
+UW3:年	0.2416
+UW3:和	-0.0837
+UW3:県	0.6293
+UW3:ス	0.0874
+UW3:前	0.2286
+UW3:１	-0.08
+UW3:総	0.1163
+UW3:少	-0.3102
+UW3:小	-0.0513
+UW3:線	0.1255
+UW3:第	0.1201
+UW3:関	-0.1282
+UW3:英	0.0785
+UW3:私	0.4231
+UW3:世	-0.2087
+UW3:省	0.0792
+UW2:行	0.0838
+UW2:最	-0.063
+UW2:調	0.101
+UW2:立	-0.0763
+UW2:朝	-0.1843
+UW2:本	-0.165
+UW2:,	-0.0829
+UW2:ッ	0.0831
+UW2:事	0.0492
+UW2:目	-0.1584
+UW2:相	-0.0242
+UW2:人	-0.0123
+UW2:東	-0.0931
+UW2:べ	0.1261
+UW2:主	-0.0861
+UW2:ま	0.06
+UW2:太	-0.0483
+UW2:ひ	-0.1273
+UW2:天	-0.0865
+UW2:強	0.1067
+UW2:開	0.1758
+UW2:に	-0.1764
+UW2:な	0.1063
+UW2:ど	0.1273
+UW2:と	-0.0981
+UW2:は	-0.0409
+UW2:の	0.013
+UW2:間	-0.1257
+UW2:入	0.0548
+UW2:だ	0.1837
+UW2:で	-0.0268
+UW2:て	-0.0291
+UW2:つ	-0.0949
+UW2:せ	0.03
+UW2:す	-0.0675
+UW2:た	0.0188
+UW2:そ	-0.1011
+UW2:こ	0.1141
+UW2:世	-0.0302
+UW2:し	0.1529
+UW2:ざ	0.054
+UW2:さ	0.0878
+UW2:か	0.1454
+UW2:お	-0.0502
+UW2:不	-0.215
+UW2:く	-0.0412
+UW2:三	-0.0758
+UW2:が	-0.0856
+UW2:あ	-0.0538
+UW2:う	0.0134
+UW2:い	0.0505
+UW2:政	0.1522
+UW2:区	-0.0422
+UW2:自	-0.1353
+UW2:揺	-0.1033
+UW2:大	-0.1769
+UW2:理	0.0752
+UW2:「	-0.0645
+UW2:」	0.3145
+UW2:次	-0.2378
+UW2:、	-0.0829
+UW2:発	0.0529
+UW2:〇	0.0892
+UW2:実	0.1023
+UW2:西	-0.0744
+UW2:込	0.3041
+UW2:日	-0.1815
+UW2:見	-0.3874
+UW2:子	-0.1519
+UW2:新	-0.1682
+UW2:学	0.076
+UW2:保	0.0362
+UW2:文	-0.1355
+UW2:中	-0.0968
+UW2:手	-0.1519
+UW2:米	0.0509
+UW2:も	-0.1263
+UW2:や	-0.0402
+UW2:り	-0.0579
+UW2:る	-0.0694
+UW2:よ	0.1639
+UW2:れ	0.0571
+UW2:を	-0.2516
+UW2:ん	0.2095
+UW2:気	-0.174
+UW2:民	-0.018
+UW2:副	-0.1566
+UW2:ア	-0.0587
+UW2:ｱ	-0.0587
+UW2:果	-0.0665
+UW2:ｷ	0.0568
+UW2:ｶ	0.0306
+UW2:カ	0.0306
+UW2:キ	0.0568
+UW2:｣	0.3145
+UW2:｢	-0.0645
+UW2:年	-0.106
+UW2:ｯ	0.0831
+UW2:市	-0.0813
+UW2:議	0.1198
+UW2:小	-0.2009
+UW2:第	0.081
+UW2:初	-0.3025
+UW2:北	-0.3414
+UW2:明	-0.1462
+UW2:県	-0.1165
+UW2:会	0.0978
+TC4:IOO	0.0054
+TC4:HIH	0.0804
+TC4:HII	0.0679
+TC4:IIO	0.0656
+TC4:III	0.1497
+TC4:IIH	0.0321
+TC4:IHO	-0.2324
+TC4:MOM	0.0841
+TC4:MHH	-0.0405
+TC4:MHI	0.0201
+TC4:HOH	0.0446
+TC4:KAK	0.4845
+TC4:HHO	0.0669
+TC4:MMM	0.0661
+TC4:IHH	0.0695
+TC4:MMH	-0.0241
+TC4:KKK	0.3065
+TC4:HHK	0.0365
+TC4:HHI	0.1344
+TC4:HHH	-0.0203
+TC4:KKA	0.3386
+TC4:HHN	0.0182
+TC4:HHM	-0.0122
+TQ3:BIIH	-0.0116
+TQ3:BIII	-0.0105
+TQ3:OKHH	0.0587
+TQ3:OIIH	0.1344
+TQ3:BHII	-0.0504
+TQ3:BHIH	0.0222
+TQ3:OOHH	0.011
+TQ3:OKAK	0.2792
+TQ3:BHHH	0.0478
+TQ3:BOMH	0.062
+TQ3:BHHM	-0.1073
+TQ3:OIHH	0.0623
+TQ3:BMHM	-0.0464
+TQ3:OOII	-0.0685
+TQ3:OKKA	0.0679
+TQ3:BMHI	-0.0863
+TQ3:OHHI	0.1729
+TQ3:OHHH	0.0346
+TQ3:OHMH	0.0481
+TQ3:OHII	0.0997
+TC2:OII	-0.2649
+TC2:HMM	-0.1154
+TC2:IHI	-0.1965
+TC2:KKH	0.0703
+TC2:HII	-0.1023
+TC2:HHO	0.2088
+TC3:KOK	-0.1009
+TC3:AAA	-0.0294
+TC3:NNO	0.0662
+TC3:OHO	-0.3393
+TC3:NNH	-0.1689
+TC3:KHH	-0.1216
+TC3:IOI	-0.0542
+TC3:IIM	-0.1035
+TC3:HII	-0.1088
+TC3:HIK	0.0731
+TC3:IIH	-0.0825
+TC3:IHO	-0.1935
+TC3:MHO	0.0123
+TC3:MHM	-0.0457
+TC3:MHH	-0.2694
+TC3:HOH	-0.1486
+TC3:KKH	-0.1217
+TC3:IHH	0.0128
+TC3:IHI	-0.3041
+TC3:MMH	-0.0471
+TC3:HHI	-0.0341
+TC3:HHH	0.0346
+TC3:KKA	0.0491
+UW5:月	-0.4353
+UW5:ン	-0.0343
+UW5:ル	0.0451
+UW5:挙	0.1618
+UW5:語	-0.1073
+UW5:,	0.0465
+UW5:者	-0.2233
+UW5:務	0.3519
+UW5:Ｅ２	-3.2768
+UW5:員	0.2104
+UW5:郎	-0.0368
+UW5:京	0.0722
+UW5:相	0.1319
+UW5:統	0.1955
+UW5:い	0.0331
+UW5:べ	0.1001
+UW5:み	0.0502
+UW5:大	-0.1296
+UW5:日	0.0218
+UW5:に	-0.1224
+UW5:な	-0.0787
+UW5:ど	0.1682
+UW5:と	-0.0127
+UW5:は	-0.0578
+UW5:の	-0.0635
+UW5:間	0.1191
+UW5:っ	0.0052
+UW5:ち	0.1093
+UW5:だ	-0.1186
+UW5:で	-0.085
+UW5:て	-0.0018
+UW5:つ	0.0921
+UW5:す	-0.0852
+UW5:党	-0.0654
+UW5:研	-0.0997
+UW5:げ	-0.0983
+UW5:し	-0.1371
+UW5:空	-0.0813
+UW5:さ	-0.1537
+UW5:か	0.0647
+UW5:お	0.0527
+UW5:え	0.1199
+UW5:く	0.0312
+UW5:ぎ	0.1971
+UW5:き	0.1624
+UW5:が	-0.0421
+UW5:あ	0.1655
+UW5:う	-0.0503
+UW5:E2	-3.2768
+UW5:表	0.0663
+UW5:区	-0.0901
+UW5:「	0.0363
+UW5:館	-0.0689
+UW5:、	0.0465
+UW5:。	-0.0299
+UW5:長	0.0786
+UW5:査	0.0932
+UW5:題	0.2368
+UW5:思	0.0872
+UW5:機	-0.1508
+UW5:定	0.1785
+UW5:.	-0.0299
+UW5:格	0.1356
+UW5:氏	-0.1347
+UW5:ﾙ	0.0451
+UW5:ﾝ	-0.0343
+UW5:社	-0.0278
+UW5:新	-0.1682
+UW5:学	-0.0548
+UW5:中	-0.0871
+UW5:所	-0.0814
+UW5:ゃ	0.335
+UW5:め	0.0865
+UW5:ょ	0.0854
+UW5:り	-0.0208
+UW5:る	0.0429
+UW5:的	-0.3149
+UW5:わ	0.0419
+UW5:れ	0.0504
+UW5:を	-0.1264
+UW5:ん	0.0327
+UW5:ｲ	0.0241
+UW5:イ	0.0241
+UW5:会	-0.1153
+UW5:嵐	-0.1304
+UW5:1	-0.0514
+UW5:｢	0.0363
+UW5:年	0.1763
+UW5:１	-0.0514
+UW5:市	-0.2991
+UW5:議	0.1219
+UW5:田	0.024
+UW5:選	-0.1018
+UW5:町	-0.3912
+UW5:]	-0.2762
+UW5:席	0.0921
+UW5:告	0.0848
+UW5:県	-0.4003
+UW5:省	-0.1052
+TC1:AAA	0.1093
+TC1:HOM	-0.0331
+TC1:HOH	-0.039
+TC1:OOI	-0.1832
+TC1:IOM	0.0467
+TC1:IHI	0.1169
+TC1:MMH	0.0187
+TC1:IOI	-0.1015
+TC1:IOH	-0.0142
+TC1:HII	0.0998
+TC1:HHH	0.1029
+TC1:HHM	0.058
+UC4:A	-0.2643
+UC4:I	-0.1032
+UC4:H	0.1809
+UC4:K	-0.345
+UC4:M	0.3565
+UC4:O	0.6646
+UC4:N	0.3876
+UQ2:OK	0.1759
+UQ2:BH	0.0216
+UQ2:BI	0.0113
+UW4:ー	-1.187
+UW4:行	-0.0792
+UW4:規	0.0792
+UW4:・	-0.4371
+UW4:園	-0.12
+UW4:ン	-0.3637
+UW4:ラ	-0.0881
+UW4:ル	-0.0856
+UW4:リ	-0.0541
+UW4:メ	-0.1635
+UW4:ぎ	-0.3821
+UW4:地	0.0866
+UW4:ト	-0.0403
+UW4:庁	-0.4556
+UW4:ッ	-0.0724
+UW4:率	0.0672
+UW4:予	0.0782
+UW4:事	-0.019
+UW4:井	-0.1768
+UW4:員	-0.091
+UW4:郎	-0.4866
+UW4:塁	-0.2094
+UW4:署	0.0749
+UW4:来	-0.0442
+UW4:力	-0.0302
+UW4:い	-0.3435
+UW4:賞	0.073
+UW4:ほ	0.1464
+UW4:べ	-0.0744
+UW4:へ	0.6665
+UW4:み	-0.2082
+UW4:ま	0.1051
+UW4:び	-0.4134
+UW4:ひ	0.4249
+UW4:ば	0.194
+UW4:共	-0.1212
+UW4:ふ	0.1345
+UW4:に	0.6499
+UW4:な	0.5433
+UW4:中	0.221
+UW4:と	0.4547
+UW4:は	0.8578
+UW4:の	0.7396
+UW4:ね	0.1413
+UW4:ぬ	0.1853
+UW4:っ	-0.5882
+UW4:ち	-0.3654
+UW4:だ	0.5408
+UW4:で	0.741
+UW4:て	0.3994
+UW4:つ	-0.1659
+UW4:せ	0.0181
+UW4:ず	0.1251
+UW4:す	-0.0731
+UW4:じ	-0.2506
+UW4:た	0.5034
+UW4:そ	0.4091
+UW4:党	-0.2006
+UW4:こ	0.2255
+UW4:げ	-0.4734
+UW4:け	-0.4376
+UW4:し	-0.0843
+UW4:さ	0.2864
+UW4:ご	0.1979
+UW4:か	0.053
+UW4:お	0.2405
+UW4:え	-0.2514
+UW4:く	-0.3788
+UW4:先	0.0601
+UW4:き	-0.4482
+UW4:が	0.6006
+UW4:あ	0.4752
+UW4:う	-0.064
+UW4:一	-0.2069
+UW4:島	-0.2056
+UW4:改	0.0787
+UW4:士	-0.1413
+UW4:政	0.2182
+UW4:区	0.4517
+UW4:野	-0.11
+UW4:支	0.0856
+UW4:系	0.0786
+UW4:館	-0.1984
+UW4:化	0.0776
+UW4:参	0.1555
+UW4:込	-0.337
+UW4:.	0.3508
+UW4:よ	0.3351
+UW4:子	-0.4802
+UW4:学	-0.1397
+UW4:感	0.0916
+UW4:校	-0.036
+UW4:般	-0.0852
+UW4:内	0.0584
+UW4:円	0.0788
+UW4:題	-0.0792
+UW4:高	0.212
+UW4:約	0.2171
+UW4:的	0.2586
+UW4:銀	-0.2213
+UW4:屋	-0.1328
+UW4:済	-0.0543
+UW4:ｰ	-1.187
+UW4:輪	-0.1433
+UW4:山	-0.15
+UW4:ｺ	0.1789
+UW4:ｾ	0.1287
+UW4:｣	0.3798
+UW4:｢	0.1895
+UW4:際	-0.2604
+UW4:･	-0.4371
+UW4:ｯ	-0.0724
+UW4:産	-0.1101
+UW4:市	0.2771
+UW4:能	-0.073
+UW4:田	-0.29
+UW4:選	0.2596
+UW4:町	0.1826
+UW4:間	-0.2344
+UW4:ｶ	0.2145
+UW4:体	-0.1286
+UW4:初	0.1347
+UW4:作	0.053
+UW4:カ	0.2145
+UW4:寺	-0.0809
+UW4:側	0.4292
+UW4:道	-0.1291
+UW4:生	-0.1286
+UW4:月	-0.9066
+UW4:都	0.1192
+UW4:最	0.0845
+UW4:立	-0.2112
+UW4:電	-0.0878
+UW4:沢	-0.0939
+UW4:業	-0.1043
+UW4:,	0.393
+UW4:者	0.2145
+UW4:教	0.0704
+UW4:務	-0.2715
+UW4:動	-0.074
+UW4:車	-0.1481
+UW4:回	0.15
+UW4:軍	0.1158
+UW4:経	0.1146
+UW4:国	-0.0619
+UW4:目	0.0922
+UW4:統	-0.1169
+UW4:大	0.0571
+UW4:日	0.1798
+UW4:谷	-0.1
+UW4:空	-0.0867
+UW4:協	0.1013
+UW4:多	0.1067
+UW4:領	-0.1659
+UW4:物	-0.0735
+UW4:人	0.1036
+UW4:〓	-0.5156
+UW4:球	-0.1267
+UW4:「	0.1895
+UW4:」	0.3798
+UW4:、	0.393
+UW4:。	0.3508
+UW4:長	0.0357
+UW4:〇	0.4999
+UW4:川	-0.2667
+UW4:定	-0.1057
+UW4:性	0.0553
+UW4:合	-0.1834
+UW4:後	0.0456
+UW4:時	0.1829
+UW4:首	0.1749
+UW4:ﾙ	-0.0856
+UW4:近	0.0929
+UW4:ﾒ	-0.1635
+UW4:ﾗ	-0.0881
+UW4:方	-0.0856
+UW4:―	-0.4841
+UW4:ﾄ	-0.0403
+UW4:文	0.0522
+UW4:所	-0.1566
+UW4:米	0.2937
+UW4:も	0.4169
+UW4:ゃ	-0.2666
+UW4:む	-0.0882
+UW4:め	-0.5046
+UW4:ょ	-0.1544
+UW4:や	0.2795
+UW4:院	-0.2297
+UW4:り	-0.9726
+UW4:る	-1.4896
+UW4:氏	0.5388
+UW4:ら	-0.2922
+UW4:わ	-0.1783
+UW4:れ	-0.2613
+UW4:ろ	-0.457
+UW4:を	1.315
+UW4:ん	-0.2352
+UW4:気	-0.091
+UW4:民	-0.2716
+UW4:場	-0.141
+UW4:ﾘ	-0.0541
+UW4:副	0.3879
+UW4:以	0.0544
+UW4:会	0.095
+UW4:ﾝ	-0.3637
+UW4:コ	0.1789
+UW4:年	0.0374
+UW4:和	-0.0681
+UW4:セ	0.1287
+UW4:前	0.1623
+UW4:器	-0.0851
+UW4:総	0.094
+UW4:議	-0.0244
+UW4:小	0.191
+UW4:警	-0.1184
+UW4:線	-0.0994
+UW4:第	0.0788
+UW4:県	0.2997
+UW4:木	-0.0485
+UW4:省	-0.3485
+UQ3:ON	-0.3212
+UQ3:BA	-0.0479
+UQ3:OI	-0.0827
+UQ3:BM	0.316
+UQ3:BN	0.6427
+UQ3:BO	1.4761
+UQ3:BH	0.0042
+UQ3:BI	0.1913
+UQ3:BK	-0.7198
+TQ1:OIHI	0.02
+TQ1:OIIH	-0.0068
+TQ1:BIII	0.1595
+TQ1:OAKK	0.0482
+TQ1:BIHH	0.006
+TQ1:BHIH	-0.0132
+TQ1:BHHH	-0.0227
+TQ1:BHHI	0.0316
+TQ1:BOHH	0.0225
+TQ1:BOOO	-0.0908
+TQ1:OHHH	0.0281
+TQ1:BNHH	-0.0744
+TQ1:OHIH	0.0249
+UC5:I	-0.1238
+UC5:H	0.0313
+UC5:K	-0.0799
+UC5:M	0.0539
+UC5:O	-0.0831
+TQ4:BIIH	-0.0607
+TQ4:BIII	-0.2181
+TQ4:OAKK	0.018
+TQ4:OIIH	0.0626
+TQ4:BHII	-0.0966
+TQ4:OIHI	-0.0493
+TQ4:BHHH	-0.0721
+TQ4:OIII	-0.4007
+TQ4:BHHM	-0.3604
+TQ4:OIHH	0.1935
+TQ4:OHIH	-0.1573
+TQ4:OKAK	-0.8156
+TQ4:OHHI	0.2446
+TQ4:OHHH	-0.0294
+TQ4:OAAA	-0.2763
+TQ4:OHHO	0.048
+TW2:その後	-0.443
+TW2:社会党	-0.3216
+TW2:もので	0.1882
+TW2:ていた	0.1833
+TW2:大きな	-0.1255
+TW2:ころが	-0.2434
+TW2:同時に	-0.8097
+TW2:一気に	-0.0792
+TW2:ともに	-0.4517
+TW2:だって	-0.1049
+TW2:対して	-0.2721
+TW2:として	-0.4657
+TW2:いった	-0.1256
+TW2:ある程	-0.2049
+TW2:初めて	-0.1512
+TW2:しょう	0.3873
+TW1:東京都	0.2026
+TW1:につい	-0.4681
+UW1:も	-0.0466
+UW1:主	-0.0402
+UW1:大	0.0561
+UW1:や	-0.047
+UW1:･	-0.0135
+UW1:り	0.0208
+UW1:日	-0.0141
+UW1:よ	0.0182
+UW1:ら	-0.0292
+UW1:区	-0.0912
+UW1:れ	0.0169
+UW1:京	-0.0268
+UW1:に	-0.0789
+UW1:ん	-0.0137
+UW1:ど	-0.0123
+UW1:と	-0.0547
+UW1:は	-0.0847
+UW1:の	-0.0185
+UW1:都	-0.0718
+UW1:あ	-0.0941
+UW1:市	-0.0411
+UW1:委	0.0729
+UW1:で	-0.0201
+UW1:県	-0.0386
+UW1:を	-0.0446
+UW1:国	-0.046
+UW1:・	-0.0135
+UW1:こ	0.0505
+UW1:理	0.0361
+UW1:午	0.0871
+UW1:,	0.0156
+UW1:｢	-0.0463
+UW1:「	-0.0463
+UW1:き	0.0121
+UW1:が	-0.0553
+UW1:、	0.0156
+UW1:う	-0.0127
+UW1:生	-0.0408
+UP3:B	0.0189
+BP1:OO	-0.0125
+BP1:OB	0.0304
+BP1:BB	0.0295
+BP1:UB	0.0352
+TW3:いただ	-0.1734
+TW3:してい	0.1314
+TW3:十二月	-0.2287
+TW3:れから	-0.3752
+TW3:のもの	-0.06
+TW3:にとっ	-0.5989
+TW3:に当た	-0.6247
+TW3:ので、	-0.0727
+TW3:ので,	-0.0727
+TW3:につい	-0.5483
+TW3:として	-0.4314
+BQ4:BMI	-0.3385
+BQ4:OAH	0.0926
+BQ4:BOO	-1.2396
+BQ4:OHH	0.0266
+BQ4:BHH	-0.3895
+BQ4:ONN	-0.0973
+BQ4:BIK	0.1348
+BQ4:BIH	0.3761
+BQ4:BII	-0.4654
+BQ4:OHK	-0.2036
+BQ4:BKK	-0.1806
+BP2:OO	-0.1762
+BP2:BO	0.006
+BQ2:BHI	-0.1159
+BQ2:BHH	0.0118
+BQ2:UHI	-0.1146
+BQ2:BHM	0.0466
+BQ2:BIH	-0.0919
+BQ2:OHM	-0.0181
+BQ2:OHH	-0.1139
+BQ2:BKO	0.0864
+BQ2:OIH	0.0153
+BQ2:BKK	-0.172
+BQ3:BHI	0.2664
+BQ3:BHH	-0.0792
+BQ3:OHM	0.0439
+BQ3:OHH	0.2174
+BQ3:OII	0.028
+BQ3:BII	-0.0299
+BQ3:BMH	0.0937
+BQ3:OMH	-0.2402
+BQ3:BKI	0.0419
+BQ3:BMM	0.8335
+BQ3:BOH	0.0775
+BQ3:BNN	0.0998
+BQ3:OKI	-0.0793
+BQ3:OKH	0.1798
+BQ3:OOO	1.1699
+BQ3:OKO	-0.2242
+TQ2:BIII	-0.1033
+TQ2:BIHH	-0.1401
+TQ2:BKAK	-0.0543
+TQ2:BOOO	-0.5591
+BQ1:BOH	-0.0091
+BQ1:BNH	0.0449
+BQ1:BOO	-0.2597
+BQ1:BHH	0.115
+BQ1:BIM	0.0886
+BQ1:BHM	0.1521
+BQ1:OHI	0.0451
+BQ1:BII	-0.1158
+BQ1:BMH	0.1208
+BQ1:OIH	-0.0296
+BQ1:OKA	0.1851
+BQ1:OKH	-0.102
+BQ1:OKK	0.0904
+BQ1:OOO	0.2965
+UQ1:OO	-0.2422
+UQ1:OK	0.041
+UQ1:OI	0.0477
+UQ1:OH	-0.0095
+UQ1:BN	0.0142
+UQ1:BO	-0.0056
+UQ1:BH	0.0021
+UQ1:BI	-0.0012
+UQ1:BK	-0.0099
+UC2:A	0.0819
+UC2:I	0.0409
+UC2:H	0.1059
+UC2:M	0.3987
+UC2:O	0.0646
+UC2:N	0.5775
+UP1:O	-0.0214
+UP2:B	0.0069
+UP2:O	0.0935
+BC1:II	0.2461
+BC1:HH	0.0006
+BC1:KH	0.0406
+BC1:OH	-0.1378
+BC2:AA	-0.3267
+BC2:OO	-0.292
+BC2:AI	0.2744
+BC2:KI	0.3831
+BC2:IK	0.1721
+BC2:MK	0.3334
+BC2:AN	-0.0878
+BC2:II	-0.1332
+BC2:IH	-0.1184
+BC2:HH	-0.407
+BC2:MH	-0.3132
+BC2:HN	0.4012
+BC2:HO	0.3761
+BC2:IO	0.5492
+BC2:HM	-0.1711
+BC2:IA	0.1327
+BC2:KK	-0.8741
+BC3:HK	-0.0721
+BC3:HH	0.0996
+BC3:HI	0.0626
+BC3:HN	-0.1307
+BC3:HO	-0.0836
+BC3:IH	-0.0301
+BC3:KK	0.2762
+BC3:OH	0.0266
+BC3:OA	-0.1652
+BC3:MM	0.4034
+BC3:MK	0.1079
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..f1add26
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1,80 @@
+max_width = 100
+hard_tabs = false
+tab_spaces = 4
+newline_style = "Auto"
+# indent_style = "Block"
+use_small_heuristics = "Default"
+fn_call_width = 80
+attr_fn_like_width = 70
+struct_lit_width = 18
+struct_variant_width = 35
+array_width = 80
+chain_width = 80
+single_line_if_else_max_width = 80
+single_line_let_else_max_width = 80
+# wrap_comments = false
+# format_code_in_doc_comments = false
+# doc_comment_code_block_width = 100
+# comment_width = 80
+# normalize_comments = false
+# normalize_doc_attributes = false
+# format_strings = false
+# format_macro_matchers = false
+# format_macro_bodies = true
+# skip_macro_invocations = []
+# hex_literal_case = "Preserve"
+# empty_item_single_line = true
+# struct_lit_single_line = true
+# fn_single_line = false
+# where_single_line = false
+# imports_indent = "Block"
+# imports_layout = "Mixed"
+# imports_granularity = "Preserve"
+# group_imports = "Preserve"
+reorder_imports = true
+reorder_modules = true
+# reorder_impl_items = false
+# type_punctuation_density = "Wide"
+# space_before_colon = false
+# space_after_colon = true
+# spaces_around_ranges = false
+# binop_separator = "Front"
+remove_nested_parens = true
+# combine_control_expr = true
+short_array_element_width_threshold = 10
+# overflow_delimited_expr = false
+# struct_field_align_threshold = 0
+# enum_discrim_align_threshold = 0
+# match_arm_blocks = true
+match_arm_leading_pipes = "Never"
+# force_multiline_blocks = false
+fn_params_layout = "Tall"
+# brace_style = "SameLineWhere"
+# control_brace_style = "AlwaysSameLine"
+# trailing_semicolon = true
+# trailing_comma = "Vertical"
+match_block_trailing_comma = false
+# blank_lines_upper_bound = 1
+# blank_lines_lower_bound = 0
+edition = "2015"
+style_edition = "2015"
+# version = "One"
+# inline_attribute_width = 0
+# format_generated_files = true
+# generated_marker_line_search_limit = 5
+merge_derives = true
+use_try_shorthand = false
+use_field_init_shorthand = false
+force_explicit_abi = true
+# condense_wildcard_suffixes = false
+# color = "Auto"
+# required_version = "1.8.0"
+# unstable_features = false
+disable_all_formatting = false
+# skip_children = false
+# show_parse_errors = true
+# error_on_line_overflow = false
+# error_on_unformatted = false
+# ignore = []
+# emit_mode = "Files"
+# make_backup = false
diff --git a/src/adaboost.rs b/src/adaboost.rs
index cfa1188..e0f9f63 100644
--- a/src/adaboost.rs
+++ b/src/adaboost.rs
@@ -78,16 +78,10 @@ impl AdaBoost {
             }
             self.num_instances += 1;
             if self.num_instances % 1000 == 0 {
-                eprint!(
-                    "\rfinding instances...: {} instances found",
-                    self.num_instances
-                );
+                eprint!("\rfinding instances...: {} instances found", self.num_instances);
             }
         }
-        eprintln!(
-            "\rfinding instances...: {} instances found",
-            self.num_instances
-        );
+        eprintln!("\rfinding instances...: {} instances found", self.num_instances);
         map.insert("".to_string(), 0.0);
 
         self.features = map.keys().cloned().collect();
@@ -133,8 +127,7 @@ impl AdaBoost {
 
             let end = self.instances_buf.len();
             self.instances.push((start, end));
-            self.instance_weights
-                .push((-2.0 * label as f64 * score).exp());
+            self.instance_weights.push((-2.0 * label as f64 * score).exp());
 
             if self.instance_weights.len() % 1000 == 0 {
                 eprint!(
@@ -192,11 +185,7 @@ impl AdaBoost {
                 }
             }
 
-            eprint!(
-                "\rIteration {} - margin: {}",
-                t,
-                (0.5 - best_error_rate).abs()
-            );
+            eprint!("\rIteration {} - margin: {}", t, (0.5 - best_error_rate).abs());
             if (0.5 - best_error_rate).abs() < self.threshold {
                 break;
             }
@@ -212,11 +201,7 @@ impl AdaBoost {
                 let label = self.labels[i];
                 let (start, end) = self.instances[i];
                 let hs = &self.instances_buf[start..end];
-                let prediction = if hs.binary_search(&h_best).is_ok() {
-                    1
-                } else {
-                    -1
-                };
+                let prediction = if hs.binary_search(&h_best).is_ok() { 1 } else { -1 };
                 if label * prediction < 0 {
                     self.instance_weights[i] *= alpha_exp;
                 } else {
@@ -335,18 +320,10 @@ impl AdaBoost {
         let recall = pp as f64 / (pp + np).max(1) as f64 * 100.0;
 
         eprintln!("Result:");
-        eprintln!(
-            "Accuracy: {:.2}% ({} / {})",
-            acc,
-            pp + nn,
-            self.num_instances
-        );
+        eprintln!("Accuracy: {:.2}% ({} / {})", acc, pp + nn, self.num_instances);
         eprintln!("Precision: {:.2}% ({} / {})", prec, pp, pp + pn);
         eprintln!("Recall: {:.2}% ({} / {})", recall, pp, pp + np);
-        eprintln!(
-            "Confusion Matrix: TP: {}, FP: {}, FN: {}, TN: {}",
-            pp, pn, np, nn
-        );
+        eprintln!("Confusion Matrix: TP: {}, FP: {}, FN: {}, TN: {}", pp, pn, np, nn);
     }
 
     /// Adds a new instance to the model.
diff --git a/src/segmenter.rs b/src/segmenter.rs
index ec613a8..e479fb6 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -19,15 +19,38 @@ impl Segmenter {
     /// A new Segmenter instance with the specified or default AdaBoost learner.
     pub fn new(learner: Option<AdaBoost>) -> Self {
         let patterns = vec![
-            (
-                Regex::new(r"[一二三四五六七八九十百千万億兆]").unwrap(),
-                "M",
-            ),
+            // Japanese Kanji numbers
+            (Regex::new(r"[一二三四五六七八九十百千万億兆]").unwrap(), "M"),
+            // Japanese Kanji
             (Regex::new(r"[一-龠々〆ヵヶ]").unwrap(), "H"),
+            // Japanese Hiragana
             (Regex::new(r"[ぁ-ん]").unwrap(), "I"),
+            // Japanese Katakana
             (Regex::new(r"[ァ-ヴーｱ-ﾝﾞｰ]").unwrap(), "K"),
+            // Latin alphabet (ASCII + full-width)
             (Regex::new(r"[a-zA-Zａ-ｚＡ-Ｚ]").unwrap(), "A"),
+            // Numbers (ASCII + full-width)
             (Regex::new(r"[0-9０-９]").unwrap(), "N"),
+            // // Japanese Kanji numbers
+            // (Regex::new(r"[一二三四五六七八九十百千万億兆]").unwrap(), "M"),
+            // // Japanese Kanji
+            // (Regex::new(r"[一-龠々〆ヵヶ]").unwrap(), "J"),
+            // // Chinese Kanji (CJK Unified Ideographs)
+            // (Regex::new(r"[㐀-䶵一-鿿]").unwrap(), "M"),
+            // // Hangul (Korean)
+            // (Regex::new(r"[가-힣]").unwrap(), "K"),
+            // // Hiragana (Japanese)
+            // (Regex::new(r"[ぁ-ん]").unwrap(), "I"),
+            // // Katakana (Japanese)
+            // (Regex::new(r"[ァ-ヴーｱ-ﾝﾞﾟ]").unwrap(), "K"),
+            // // Latin alphabet (ASCII + full-width)
+            // (Regex::new(r"[a-zA-Zａ-ｚＡ-Ｚ]").unwrap(), "A"),
+            // // Numbers (ASCII + full-width)
+            // (Regex::new(r"[0-9０-９]").unwrap(), "N"),
+            // // Vietnamese Extended Latin
+            // (Regex::new(r"[À-ſ]").unwrap(), "V"),
+            // // Thai script
+            // (Regex::new(r"[ก-๛]").unwrap(), "T"),
         ];
 
         Segmenter {
@@ -279,9 +302,8 @@ mod tests {
 
     #[test]
     fn test_segmenter() {
-        let model_file = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
-            .join("./resources")
-            .join("RWCP.model");
+        let model_file =
+            PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("./resources").join("RWCP.model");
 
         let mut learner = AdaBoost::new(0.01, 100, 1);
         learner.load_model(model_file.as_path()).unwrap();

From 6ec56ca2e062fa60c40230c3a994e39fe82ee370 Mon Sep 17 00:00:00 2001
From: Minoru OSUKA <minoru.osuka@gmail.com>
Date: Tue, 3 Jun 2025 22:04:24 +0900
Subject: [PATCH 05/15] Support multi languages (#5)

* Support multi languages

* Fix document
---
 src/segmenter.rs | 48 +++++++++++++++++++-----------------------------
 1 file changed, 19 insertions(+), 29 deletions(-)

diff --git a/src/segmenter.rs b/src/segmenter.rs
index e479fb6..cf0e117 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -19,38 +19,26 @@ impl Segmenter {
     /// A new Segmenter instance with the specified or default AdaBoost learner.
     pub fn new(learner: Option<AdaBoost>) -> Self {
         let patterns = vec![
+            // Numbers
+            (Regex::new(r"[0-9０-９]").unwrap(), "N"),
             // Japanese Kanji numbers
             (Regex::new(r"[一二三四五六七八九十百千万億兆]").unwrap(), "M"),
-            // Japanese Kanji
-            (Regex::new(r"[一-龠々〆ヵヶ]").unwrap(), "H"),
-            // Japanese Hiragana
+            // Hiragana (Japanese)
             (Regex::new(r"[ぁ-ん]").unwrap(), "I"),
-            // Japanese Katakana
-            (Regex::new(r"[ァ-ヴーｱ-ﾝﾞｰ]").unwrap(), "K"),
-            // Latin alphabet (ASCII + full-width)
+            // Katakana (Japanese)
+            (Regex::new(r"[ァ-ヴーｱ-ﾝﾞﾟ]").unwrap(), "K"),
+            // Hangul (Korean)
+            (Regex::new(r"[가-힣]").unwrap(), "G"),
+            // Thai script
+            (Regex::new(r"[ก-๛]").unwrap(), "T"),
+            // Kanji (Japanese)
+            (Regex::new(r"[一-龠々〆ヵヶ]").unwrap(), "H"),
+            // Kanji (CJK Unified Ideographs)
+            (Regex::new(r"[㐀-䶵一-鿿]").unwrap(), "Z"),
+            // Extended Latin (Vietnamese, etc.)
+            (Regex::new(r"[À-ÿĀ-ſƀ-ƿǍ-ɏ]").unwrap(), "E"),
+            // ASCII + Full-width Latin
             (Regex::new(r"[a-zA-Zａ-ｚＡ-Ｚ]").unwrap(), "A"),
-            // Numbers (ASCII + full-width)
-            (Regex::new(r"[0-9０-９]").unwrap(), "N"),
-            // // Japanese Kanji numbers
-            // (Regex::new(r"[一二三四五六七八九十百千万億兆]").unwrap(), "M"),
-            // // Japanese Kanji
-            // (Regex::new(r"[一-龠々〆ヵヶ]").unwrap(), "J"),
-            // // Chinese Kanji (CJK Unified Ideographs)
-            // (Regex::new(r"[㐀-䶵一-鿿]").unwrap(), "M"),
-            // // Hangul (Korean)
-            // (Regex::new(r"[가-힣]").unwrap(), "K"),
-            // // Hiragana (Japanese)
-            // (Regex::new(r"[ぁ-ん]").unwrap(), "I"),
-            // // Katakana (Japanese)
-            // (Regex::new(r"[ァ-ヴーｱ-ﾝﾞﾟ]").unwrap(), "K"),
-            // // Latin alphabet (ASCII + full-width)
-            // (Regex::new(r"[a-zA-Zａ-ｚＡ-Ｚ]").unwrap(), "A"),
-            // // Numbers (ASCII + full-width)
-            // (Regex::new(r"[0-9０-９]").unwrap(), "N"),
-            // // Vietnamese Extended Latin
-            // (Regex::new(r"[À-ſ]").unwrap(), "V"),
-            // // Thai script
-            // (Regex::new(r"[ก-๛]").unwrap(), "T"),
         ];
 
         Segmenter {
@@ -65,7 +53,9 @@ impl Segmenter {
     /// * `ch` - A string slice representing a single character.
     ///
     /// # Returns
-    /// A static string representing the type of the character, such as "M", "H", "I", "K", "A", "N", or "O" (for others).
+    /// A string slice representing the type of the character, such as "N" for number,
+    /// "I" for Hiragana, "K" for Katakana, etc. If the character does not match any pattern,
+    /// it returns "O" for Other.
     pub fn get_type(&self, ch: &str) -> &str {
         for (pattern, label) in &self.patterns {
             if pattern.is_match(ch) {

From ba89b7420ecc21a0180781bebd904d133c865f1c Mon Sep 17 00:00:00 2001
From: Minoru OSUKA <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 00:11:03 +0900
Subject: [PATCH 06/15] Refactoring (#6)

---
 src/adaboost.rs | 136 ++++++++++++++++++++++++++++++------------------
 src/main.rs     |  32 ++++++++++--
 src/trainer.rs  |   7 ++-
 3 files changed, 117 insertions(+), 58 deletions(-)

diff --git a/src/adaboost.rs b/src/adaboost.rs
index e0f9f63..2272343 100644
--- a/src/adaboost.rs
+++ b/src/adaboost.rs
@@ -7,6 +7,26 @@ use std::sync::Arc;
 
 type Label = i8;
 
+/// Structure to hold evaluation metrics.
+pub struct Metrics {
+    /// Accuracy in percentage (%)
+    pub accuracy: f64,
+    /// Precision in percentage (%)
+    pub precision: f64,
+    /// Recall in percentage (%)
+    pub recall: f64,
+    /// Number of instances in the dataset
+    pub num_instances: usize,
+    /// True Positives count
+    pub true_positives: usize,
+    /// False Positives count
+    pub false_positives: usize,
+    /// False Negatives count
+    pub false_negatives: usize,
+    /// True Negatives count
+    pub true_negatives: usize,
+}
+
 /// AdaBoost implementation for binary classification
 /// This implementation uses a simple feature extraction method
 /// and is designed for educational purposes.
@@ -72,16 +92,20 @@ impl AdaBoost {
             let line = line?;
             let mut parts = line.split_whitespace();
             let _label = parts.next();
+
             for h in parts {
                 map.entry(h.to_string()).or_insert(0.0);
                 buf_size += 1;
             }
+
             self.num_instances += 1;
             if self.num_instances % 1000 == 0 {
                 eprint!("\rfinding instances...: {} instances found", self.num_instances);
             }
         }
+
         eprintln!("\rfinding instances...: {} instances found", self.num_instances);
+
         map.insert("".to_string(), 0.0);
 
         self.features = map.keys().cloned().collect();
@@ -128,7 +152,6 @@ impl AdaBoost {
             let end = self.instances_buf.len();
             self.instances.push((start, end));
             self.instance_weights.push((-2.0 * label as f64 * score).exp());
-
             if self.instance_weights.len() % 1000 == 0 {
                 eprint!(
                     "\rloading instances...: {}/{} instances loaded",
@@ -137,7 +160,13 @@ impl AdaBoost {
                 );
             }
         }
-        eprintln!();
+
+        eprintln!(
+            "\rloading instances...: {}/{} instances loaded",
+            self.instance_weights.len(),
+            self.num_instances
+        );
+
         Ok(())
     }
 
@@ -277,55 +306,6 @@ impl AdaBoost {
         Ok(())
     }
 
-    /// Gets the bias term of the model.
-    /// The bias is calculated as the negative sum of the model weights divided by 2.
-    ///
-    /// # Returns:The bias term as a `f64`.
-    pub fn get_bias(&self) -> f64 {
-        -self.model.iter().sum::<f64>() / 2.0
-    }
-
-    /// Displays the result of the model's performance on the training data.
-    /// It calculates accuracy, precision, recall, and confusion matrix.
-    pub fn show_result(&self) {
-        let bias = self.get_bias();
-        let mut pp = 0;
-        let mut pn = 0;
-        let mut np = 0;
-        let mut nn = 0;
-
-        for i in 0..self.num_instances {
-            let label = self.labels[i];
-            let (start, end) = self.instances[i];
-            let mut score = bias;
-            for &h in &self.instances_buf[start..end] {
-                score += self.model[h];
-            }
-
-            if score >= 0.0 {
-                if label > 0 {
-                    pp += 1
-                } else {
-                    pn += 1
-                }
-            } else if label > 0 {
-                np += 1
-            } else {
-                nn += 1
-            }
-        }
-
-        let acc = (pp + nn) as f64 / self.num_instances as f64 * 100.0;
-        let prec = pp as f64 / (pp + pn).max(1) as f64 * 100.0;
-        let recall = pp as f64 / (pp + np).max(1) as f64 * 100.0;
-
-        eprintln!("Result:");
-        eprintln!("Accuracy: {:.2}% ({} / {})", acc, pp + nn, self.num_instances);
-        eprintln!("Precision: {:.2}% ({} / {})", prec, pp, pp + pn);
-        eprintln!("Recall: {:.2}% ({} / {})", recall, pp, pp + np);
-        eprintln!("Confusion Matrix: TP: {}, FP: {}, FN: {}, TN: {}", pp, pn, np, nn);
-    }
-
     /// Adds a new instance to the model.
     /// The instance is represented by a set of attributes and a label.
     ///
@@ -372,4 +352,58 @@ impl AdaBoost {
             -1
         }
     }
+
+    /// Gets the bias term of the model.
+    /// The bias is calculated as the negative sum of the model weights divided by 2.
+    ///
+    /// # Returns:The bias term as a `f64`.
+    pub fn get_bias(&self) -> f64 {
+        -self.model.iter().sum::<f64>() / 2.0
+    }
+
+    /// Calculates and returns the performance metrics of the model on the training data.
+    pub fn get_metrics(&self) -> Metrics {
+        let bias = self.get_bias();
+        let mut true_positives = 0; // true positives
+        let mut false_positives = 0; // false positives
+        let mut false_negatives = 0; // false negatives
+        let mut true_negatives = 0; // true negatives
+
+        for i in 0..self.num_instances {
+            let label = self.labels[i];
+            let (start, end) = self.instances[i];
+            let mut score = bias;
+            for &h in &self.instances_buf[start..end] {
+                score += self.model[h];
+            }
+            if score >= 0.0 {
+                if label > 0 {
+                    true_positives += 1;
+                } else {
+                    false_positives += 1;
+                }
+            } else if label > 0 {
+                false_negatives += 1;
+            } else {
+                true_negatives += 1;
+            }
+        }
+
+        let accuracy = (true_positives + true_negatives) as f64 / self.num_instances as f64 * 100.0;
+        let precision =
+            true_positives as f64 / (true_positives + false_positives).max(1) as f64 * 100.0;
+        let recall =
+            true_positives as f64 / (true_positives + false_negatives).max(1) as f64 * 100.0;
+
+        Metrics {
+            accuracy,
+            precision,
+            recall,
+            num_instances: self.num_instances,
+            true_positives,
+            false_positives,
+            false_negatives,
+            true_negatives,
+        }
+    }
 }
diff --git a/src/main.rs b/src/main.rs
index e31f697..ff85dde 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -92,7 +92,7 @@ fn extract(args: ExtractArgs) -> Result<(), Box<dyn Error>> {
 
     extractor.extract(args.corpus_file.as_path(), args.features_file.as_path())?;
 
-    println!("Feature extraction completed successfully.");
+    eprintln!("Feature extraction completed successfully.");
     Ok(())
 }
 
@@ -129,9 +129,35 @@ fn train(args: TrainArgs) -> Result<(), Box<dyn Error>> {
         trainer.load_model(model_path.as_path())?;
     }
 
-    trainer.train(running, args.model_file.as_path())?;
+    let metrics = trainer.train(running, args.model_file.as_path())?;
+
+    eprintln!("Result Metrics:");
+    eprintln!(
+        "  Accuracy: {:.2}% ( {} / {} )",
+        metrics.accuracy,
+        metrics.true_positives + metrics.true_negatives,
+        metrics.num_instances
+    );
+    eprintln!(
+        "  Precision: {:.2}% ( {} / {} )",
+        metrics.precision,
+        metrics.true_positives,
+        metrics.true_positives + metrics.false_positives
+    );
+    eprintln!(
+        "  Recall: {:.2}% ( {} / {} )",
+        metrics.recall,
+        metrics.true_positives,
+        metrics.true_positives + metrics.false_negatives
+    );
+    eprintln!(
+        "  Confusion Matrix:\n    True Positives: {}\n    False Positives: {}\n    False Negatives: {}\n    True Negatives: {}",
+        metrics.true_positives,
+        metrics.false_positives,
+        metrics.false_negatives,
+        metrics.true_negatives
+    );
 
-    println!("Training completed successfully.");
     Ok(())
 }
 
diff --git a/src/trainer.rs b/src/trainer.rs
index dd76a94..1bce18d 100644
--- a/src/trainer.rs
+++ b/src/trainer.rs
@@ -2,7 +2,7 @@ use std::path::Path;
 use std::sync::atomic::AtomicBool;
 use std::sync::Arc;
 
-use crate::adaboost::AdaBoost;
+use crate::adaboost::{AdaBoost, Metrics};
 
 /// Trainer struct for managing the AdaBoost training process.
 /// It initializes the AdaBoost learner with the specified parameters,
@@ -74,13 +74,12 @@ impl Trainer {
         &mut self,
         running: Arc<AtomicBool>,
         model_path: &Path,
-    ) -> Result<(), Box<dyn std::error::Error>> {
+    ) -> Result<Metrics, Box<dyn std::error::Error>> {
         self.learner.train(running.clone());
 
         // Save the trained model to the specified file
         self.learner.save_model(model_path)?;
-        self.learner.show_result();
 
-        Ok(())
+        Ok(self.learner.get_metrics())
     }
 }

From 41438e854c9d3da5284e43682c2e53a5212eb6ce Mon Sep 17 00:00:00 2001
From: Minoru OSUKA <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 17:08:45 +0900
Subject: [PATCH 07/15] Add tests (#7)

---
 src/segmenter.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/segmenter.rs b/src/segmenter.rs
index cf0e117..871dde4 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -304,6 +304,11 @@ mod tests {
         let result = segmenter.parse(sentence);
         assert!(!result.is_empty());
         assert_eq!(result.len(), 5); // Adjust based on expected segmentation
+        assert_eq!(result[0], "これ");
+        assert_eq!(result[1], "は");
+        assert_eq!(result[2], "テスト");
+        assert_eq!(result[3], "です");
+        assert_eq!(result[4], "。");
     }
 
     #[test]

From 7fc5c5c42344e2731c122e31d73d6d16f29311f3 Mon Sep 17 00:00:00 2001
From: Minoru OSUKA <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 17:59:48 +0900
Subject: [PATCH 08/15] Rename function (#8)

---
 src/main.rs      | 2 +-
 src/segmenter.rs | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index ff85dde..06d9692 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -186,7 +186,7 @@ fn segment(args: SegmentArgs) -> Result<(), Box<dyn Error>> {
         if line.is_empty() {
             continue;
         }
-        let tokens = segmenter.parse(line);
+        let tokens = segmenter.segment(line);
         writeln!(writer, "{}", tokens.join(" "))?;
     }
 
diff --git a/src/segmenter.rs b/src/segmenter.rs
index 871dde4..7d4b99d 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -160,14 +160,14 @@ impl Segmenter {
         }
     }
 
-    /// Parses a sentence and segments it into words.
+    /// Segments a sentence and segments it into words.
     ///
     /// # Arguments
     /// * `sentence` - A string slice representing the sentence to be parsed.
     ///
     /// # Returns
     /// A vector of strings, where each string is a segmented word from the sentence.
-    pub fn parse(&self, sentence: &str) -> Vec<String> {
+    pub fn segment(&self, sentence: &str) -> Vec<String> {
         if sentence.is_empty() {
             return Vec::new();
         }
@@ -301,7 +301,7 @@ mod tests {
         let mut segmenter = Segmenter::new(Some(learner));
         let sentence = "これはテストです。";
         segmenter.add_sentence(sentence);
-        let result = segmenter.parse(sentence);
+        let result = segmenter.segment(sentence);
         assert!(!result.is_empty());
         assert_eq!(result.len(), 5); // Adjust based on expected segmentation
         assert_eq!(result[0], "これ");

From a55470806389aabd4de0298e2c9c3863b4d9de65 Mon Sep 17 00:00:00 2001
From: Minoru Osuka <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 21:11:43 +0900
Subject: [PATCH 09/15] Update segmenter

---
 src/segmenter.rs | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/segmenter.rs b/src/segmenter.rs
index 7d4b99d..be740c7 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -292,16 +292,18 @@ mod tests {
 
     #[test]
     fn test_segmenter() {
+        let sentence = "これはテストです。";
+
         let model_file =
             PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("./resources").join("RWCP.model");
-
         let mut learner = AdaBoost::new(0.01, 100, 1);
         learner.load_model(model_file.as_path()).unwrap();
 
         let mut segmenter = Segmenter::new(Some(learner));
-        let sentence = "これはテストです。";
+
         segmenter.add_sentence(sentence);
         let result = segmenter.segment(sentence);
+
         assert!(!result.is_empty());
         assert_eq!(result.len(), 5); // Adjust based on expected segmentation
         assert_eq!(result[0], "これ");
@@ -314,6 +316,7 @@ mod tests {
     #[test]
     fn test_get_type() {
         let segmenter = Segmenter::new(None);
+
         assert_eq!(segmenter.get_type("あ"), "I"); // Hiragana
         assert_eq!(segmenter.get_type("漢"), "H"); // Kanji
         assert_eq!(segmenter.get_type("A"), "A"); // Latin

From 1a8073f89f00038e115875cca7b91c206144dd9f Mon Sep 17 00:00:00 2001
From: Minoru OSUKA <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 21:35:41 +0900
Subject: [PATCH 10/15] Add tests (#9)

---
 src/segmenter.rs | 71 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/src/segmenter.rs b/src/segmenter.rs
index be740c7..fb5af4a 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -290,6 +290,37 @@ mod tests {
 
     use super::*;
 
+    #[test]
+    fn test_add_sentence_with_writer() {
+        let mut segmenter = Segmenter::new(None);
+        let sentence = "テスト です";
+        let mut collected = Vec::new();
+
+        segmenter.add_sentence_with_writer(sentence, |attrs, label| {
+            collected.push((attrs, label));
+        });
+
+        // There should be as many instances as there are characters (excluding padding)
+        assert!(!collected.is_empty());
+
+        // Check that labels are either 1 or -1
+        for (_, label) in &collected {
+            assert!(*label == 1 || *label == -1);
+        }
+
+        // Check that attributes contain expected keys
+        let (attrs, _) = &collected[0];
+        assert!(attrs.iter().any(|a| a.starts_with("UW")));
+        assert!(attrs.iter().any(|a| a.starts_with("UC")));
+    }
+
+    #[test]
+    fn test_add_sentence_empty() {
+        let mut segmenter = Segmenter::new(None);
+        segmenter.add_sentence("");
+        // Should not panic or add anything
+    }
+
     #[test]
     fn test_segmenter() {
         let sentence = "これはテストです。";
@@ -313,6 +344,13 @@ mod tests {
         assert_eq!(result[4], "。");
     }
 
+    #[test]
+    fn test_segment_empty_sentence() {
+        let segmenter = Segmenter::new(None);
+        let result = segmenter.segment("");
+        assert!(result.is_empty());
+    }
+
     #[test]
     fn test_get_type() {
         let segmenter = Segmenter::new(None);
@@ -321,5 +359,38 @@ mod tests {
         assert_eq!(segmenter.get_type("漢"), "H"); // Kanji
         assert_eq!(segmenter.get_type("A"), "A"); // Latin
         assert_eq!(segmenter.get_type("1"), "N"); // Digit
+        assert_eq!(segmenter.get_type("@"), "O"); // Not matching any pattern
+    }
+
+    #[test]
+    fn test_get_attributes_content() {
+        let segmenter = Segmenter::new(None);
+
+        let tags = vec!["U".to_string(); 7];
+
+        let chars = vec![
+            "B3".to_string(), // index 0
+            "B2".to_string(), // index 1
+            "B1".to_string(), // index 2
+            "あ".to_string(), // index 3
+            "い".to_string(), // index 4
+            "う".to_string(), // index 5
+            "E1".to_string(), // index 6
+        ];
+
+        let types = vec![
+            "O".to_string(), // index 0
+            "O".to_string(), // index 1
+            "O".to_string(), // index 2
+            "O".to_string(), // index 3
+            "I".to_string(), // index 4
+            "I".to_string(), // index 5
+            "O".to_string(), // index 6
+        ];
+
+        let attrs = segmenter.get_attributes(4, &tags, &chars, &types);
+        assert!(attrs.contains("UW4:い"));
+        assert!(attrs.contains("UC4:I"));
+        assert!(attrs.contains("UP3:U"));
     }
 }

From f734991c7e6e3766a5a47b2f95b6cbb7398ccd7b Mon Sep 17 00:00:00 2001
From: Minoru OSUKA <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 21:46:28 +0900
Subject: [PATCH 11/15] Add test (#10)

---
 Cargo.lock       | 91 ++++++++++++++++++++++++++++++++++++++++++++++++
 Cargo.toml       |  3 ++
 src/extractor.rs | 38 ++++++++++++++++++++
 3 files changed, 132 insertions(+)

diff --git a/Cargo.lock b/Cargo.lock
index a878421..4594d15 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -166,6 +166,34 @@ version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
+[[package]]
+name = "errno"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "getrandom"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasi",
+]
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -190,6 +218,12 @@ version = "0.2.172"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
+
 [[package]]
 name = "litsea"
 version = "0.1.0"
@@ -200,6 +234,7 @@ dependencies = [
  "regex",
  "serde",
  "serde_json",
+ "tempfile",
 ]
 
 [[package]]
@@ -220,6 +255,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
 [[package]]
 name = "once_cell_polyfill"
 version = "1.70.1"
@@ -244,6 +285,12 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "5.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
+
 [[package]]
 name = "rayon"
 version = "1.10.0"
@@ -293,6 +340,19 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
 
+[[package]]
+name = "rustix"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys",
+]
+
 [[package]]
 name = "ryu"
 version = "1.0.20"
@@ -348,6 +408,19 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "tempfile"
+version = "3.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
+dependencies = [
+ "fastrand",
+ "getrandom",
+ "once_cell",
+ "rustix",
+ "windows-sys",
+]
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.18"
@@ -360,6 +433,15 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
+[[package]]
+name = "wasi"
+version = "0.14.2+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
+dependencies = [
+ "wit-bindgen-rt",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.59.0"
@@ -432,3 +514,12 @@ name = "windows_x86_64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "wit-bindgen-rt"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
+dependencies = [
+ "bitflags",
+]
diff --git a/Cargo.toml b/Cargo.toml
index ad4ee53..1083139 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,3 +21,6 @@ rayon = "1.10.0"
 regex = "1.10.5"
 serde = { version = "1.0.219", features = ["derive"] }
 serde_json = "1.0.140"
+
+[dev-dependencies]
+tempfile = "3.20.0"
diff --git a/src/extractor.rs b/src/extractor.rs
index 6efa325..fbb7b92 100644
--- a/src/extractor.rs
+++ b/src/extractor.rs
@@ -84,3 +84,41 @@ impl Extractor {
         Ok(())
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::fs::File;
+    use std::io::{Read, Write};
+
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn test_extract() -> Result<(), Box<dyn std::error::Error>> {
+        // Create a temporary file to simulate the corpus input
+        let mut corpus_file = NamedTempFile::new()?;
+        writeln!(corpus_file, "これ は テスト です 。")?;
+        writeln!(corpus_file, "別 の 文 も あり ます 。")?;
+        corpus_file.as_file().sync_all()?;
+
+        // Create a temporary file for the features output
+        let features_file = NamedTempFile::new()?;
+
+        // Create an instance of Extractor and extract features
+        let mut extractor = Extractor::new();
+        extractor.extract(corpus_file.path(), features_file.path())?;
+
+        // Read the output from the features file
+        let mut output = String::new();
+        File::open(features_file.path())?.read_to_string(&mut output)?;
+
+        // Check if the output is not empty
+        assert!(!output.is_empty(), "Extracted features should not be empty");
+
+        // Check if the output contains tab-separated values
+        assert!(output.contains("\t"), "Output should contain tab-separated values");
+
+        Ok(())
+    }
+}

From 094909fa4f2261158d6d2b86de63f0c861517da3 Mon Sep 17 00:00:00 2001
From: Minoru OSUKA <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 22:01:38 +0900
Subject: [PATCH 12/15] Tests (#11)

* Add test

* Add test

* Delete unused model
---
 resources/RWCP.model.bak | 1341 --------------------------------------
 src/trainer.rs           |   76 +++
 2 files changed, 76 insertions(+), 1341 deletions(-)
 delete mode 100644 resources/RWCP.model.bak

diff --git a/resources/RWCP.model.bak b/resources/RWCP.model.bak
deleted file mode 100644
index eef6ffa..0000000
--- a/resources/RWCP.model.bak
+++ /dev/null
@@ -1,1341 +0,0 @@
--0.0332
-UW6:ン	-0.0496
-UW6:連	0.0463
-UW6:ル	-0.0673
-UW6:か	0.0241
-UW6:業	-0.0697
-UW6:,	0.0227
-UW6:者	0.1811
-UW6:.	0.0808
-UW6:福	0.0974
-UW6:後	0.0535
-UW6:広	-0.0695
-UW6:ﾙ	-0.0673
-UW6:Ｅ１	0.0306
-UW6:ﾝ	-0.0496
-UW6:員	-0.1212
-UW6:に	-0.0149
-UW6:学	-0.096
-UW6:郎	0.1082
-UW6:E1	0.0306
-UW6:相	0.0753
-UW6:も	-0.0206
-UW6:り	0.0187
-UW6:る	-0.0135
-UW6:社	-0.0507
-UW6:を	0.0195
-UW6:な	-0.0253
-UW6:中	0.0201
-UW6:と	-0.0105
-UW6:は	-0.0236
-UW6:の	-0.0417
-UW6:っ	0.0573
-UW6:で	0.0101
-UW6:て	-0.1014
-UW6:す	0.0383
-UW6:じ	0.1782
-UW6:た	-0.0428
-UW6:こ	-0.02
-UW6:会	0.0624
-UW6:空	-0.0822
-UW6:件	-0.08
-UW6:く	-0.0121
-UW6:が	-0.0073
-UW6:あ	-0.0307
-UW6:前	0.0302
-UW6:う	0.0189
-UW6:一	-0.0277
-UW6:１	-0.027
-UW6:市	0.0887
-UW6:委	0.0798
-UW6:区	0.1792
-UW6:1	-0.027
-UW6:、	0.0227
-UW6:。	0.0808
-BW1:には	0.1498
-BW1:Ｂ１あ	0.1404
-BW1:ため	0.0601
-BW1:大阪	0.1497
-BW1:に対	-0.0912
-BW1:引き	-0.1336
-BW1:から	0.3472
-BW1:れた	0.2369
-BW1:うん	0.0665
-BW1:,同	0.0727
-BW1:毎日	-0.2113
-BW1:やむ	-0.1947
-BW1:です	0.3445
-BW1:まで	0.1711
-BW1:いる	0.0672
-BW1:、同	0.0727
-BW1:｣と	0.1682
-BW1:、と	0.066
-BW1:よっ	-0.2565
-BW1:なっ	0.3015
-BW1:日本	-0.0195
-BW1:すで	-0.3399
-BW1:平方	-0.2314
-BW1:れで	-0.0913
-BW1:とい	-0.4915
-BW1:ませ	0.2448
-BW1:つい	-0.0802
-BW1:を見	0.0731
-BW1:てき	0.1249
-BW1:それ	-0.0871
-BW1:こん	-0.1262
-BW1:でき	0.1127
-BW1:をし	0.186
-BW1:てい	0.0805
-BW1:大き	-0.2604
-BW1:B1あ	0.1404
-BW1:の中	0.0741
-BW1:して	0.1104
-BW1:では	0.0844
-BW1:さら	-0.4143
-BW1:どこ	0.3887
-BW1:京都	0.2558
-BW1:いう	0.1743
-BW1:うし	-0.4817
-BW1:ない	0.5713
-BW1:にし	0.2468
-BW1:まま	0.26
-BW1:あっ	0.1505
-BW1:の一	-0.0501
-BW1:亡く	-0.1886
-BW1:こと	0.2083
-BW1:った	0.3463
-BW1:さん	0.4573
-BW1:にも	0.1671
-BW1:なん	-0.1113
-BW1:Ｂ１同	0.0542
-BW1:とみ	0.1922
-BW1:」と	0.1682
-BW1:いっ	-0.2055
-BW1:たち	0.1122
-BW1:本当	-0.2423
-BW1:,と	0.066
-BW1:がら	0.06
-BW1:こう	-0.079
-BW1:取り	-0.2784
-BW1:目指	-0.0724
-BW1:した	0.2641
-BW1:B1同	0.0542
-BW1:そこ	0.1977
-BW1:まる	-0.2155
-BW1:など	0.7379
-BW2:との	0.072
-BW2:われ	0.7901
-BW2:とと	-0.2279
-BW2:新聞	-0.4066
-BW2:を通	-1.1877
-BW2:とみ	0.5168
-BW2:朝鮮	-0.2355
-BW2:大阪	-0.2471
-BW2:同党	0.097
-BW2:とい	0.189
-BW2:がい	0.0853
-BW2:とこ	-0.1746
-BW2:11	-0.0669
-BW2:米国	-0.4268
-BW2:れて	0.0849
-BW2:日新	-0.0722
-BW2:れば	0.4114
-BW2:うか	0.249
-BW2:くな	-0.1597
-BW2:に関	-1.1388
-BW2:こと	-0.8392
-BW2:かし	-0.135
-BW2:この	-0.4193
-BW2:なん	0.3099
-BW2:府県	-0.2363
-BW2:にし	0.2748
-BW2:会社	-0.1116
-BW2:同日	-0.0913
-BW2:れた	0.427
-BW2:かも	-0.0602
-BW2:にな	0.2454
-BW2:然と	-0.1384
-BW2:に対	-1.4943
-BW2:から	-0.7194
-BW2:上が	-0.4479
-BW2:かれ	0.4612
-BW2:んな	-0.4115
-BW2:ては	-0.311
-BW2:めて	-0.3153
-BW2:んだ	0.0728
-BW2:らか	-0.0944
-BW2:一方	-0.1375
-BW2:にお	-0.1615
-BW2:分の	-0.7758
-BW2:まし	-0.1316
-BW2:てき	0.364
-BW2:てく	0.2551
-BW2:てい	0.6144
-BW2:らに	-0.1897
-BW2:手権	-0.1982
-BW2:一日	0.097
-BW2:りし	0.0651
-BW2:はず	-0.2532
-BW2:いう	-0.1609
-BW2:少な	-0.105
-BW2:はが	-0.1033
-BW2:はい	0.1073
-BW2:ろう	0.6067
-BW2:でも	-0.4203
-BW2:りま	0.162
-BW2:日本	-0.7068
-BW2:社会	-0.1276
-BW2:らし	-0.1611
-BW2:もの	-1.0713
-BW2:させ	0.4533
-BW2:..	-1.1822
-BW2:に従	-0.4688
-BW2:東京	-0.1543
-BW2:もい	0.223
-BW2:され	1.3168
-BW2:その	-0.3744
-BW2:たい	-0.1253
-BW2:たた	-0.0662
-BW2:一部	-0.1051
-BW2:ばれ	0.1813
-BW2:委員	-0.125
-BW2:った	0.4589
-BW2:さん	-0.3977
-BW2:たは	-0.0939
-BW2:一人	0.0602
-BW2:って	0.1647
-BW2:たと	0.1224
-BW2:っと	-0.2094
-BW2:たち	-0.0786
-BW2:ただ	-0.3857
-BW2:立て	-0.099
-BW2:まれ	0.5409
-BW2:出て	0.2163
-BW2:ても	-0.3065
-BW2:まで	-0.6621
-BW2:１１	-0.0669
-BW2:年度	-0.8669
-BW2:なの	0.2614
-BW2:など	-0.6509
-BW2:でし	-0.3828
-BW2:第に	-0.1612
-BW2:曜日	-0.0601
-BW2:です	-0.4761
-BW2:なが	-0.1313
-BW2:でい	0.2666
-BW2:ない	-0.2488
-BW2:でき	-0.1528
-BW2:して	0.0972
-BW2:きた	0.1941
-BW2:ので	-0.7059
-BW2:のに	-0.6041
-BW2:しな	0.0939
-BW2:のの	-0.6125
-BW2:本人	-0.2697
-BW2:――	-0.573
-BW2:しい	-0.1819
-BW2:によ	-0.7236
-BW2:のか	0.2093
-BW2:しか	-0.0545
-BW2:年間	-0.1626
-BW2:がら	-0.3198
-BW2:とも	-0.3941
-BW2:−−	-1.3175
-BW2:した	0.5078
-BW2:日米	0.3372
-BW3:との	0.0541
-BW3:われ	-0.0605
-BW3:だ。	0.4098
-BW3:す.	-0.131
-BW3:い。	-0.1185
-BW3:か。	0.2857
-BW3:がっ	-0.0913
-BW3:がけ	-0.1127
-BW3:とう	-0.1387
-BW3:ず、	0.3426
-BW3:新聞	-0.5055
-BW3:日,	0.0974
-BW3:とし	0.2266
-BW3:会議	0.086
-BW3:がき	-0.4855
-BW3:れて	0.1375
-BW3:する	0.6521
-BW3:ず,	0.3426
-BW3:に,	-0.1021
-BW3:られ	0.682
-BW3:かけ	-0.0743
-BW3:こと	0.7397
-BW3:この	0.1542
-BW3:かに	-0.0669
-BW3:かっ	-0.4098
-BW3:てい	0.624
-BW3:が、	0.1816
-BW3:うち	0.1117
-BW3:れた	0.185
-BW3:にな	0.1906
-BW3:には	0.2644
-BW3:かり	-0.267
-BW3:から	0.652
-BW3:は、	0.1337
-BW3:まっ	-0.1549
-BW3:まで	0.6154
-BW3:んで	0.0798
-BW3:んだ	0.0606
-BW3:うと	0.4798
-BW3:ころ	-0.2757
-BW3:ます	0.6943
-BW3:てお	0.0855
-BW3:入り	0.1232
-BW3:にし	0.1771
-BW3:いえ	0.2079
-BW3:す。	-0.131
-BW3:いく	0.3029
-BW3:ずに	0.0841
-BW3:いい	0.5308
-BW3:るる	0.3818
-BW3:れ、	0.0854
-BW3:いた	0.2056
-BW3:の子	-0.1
-BW3:だっ	0.1004
-BW3:いっ	0.1883
-BW3:ｶ月	0.099
-BW3:か.	0.2857
-BW3:けど	0.1374
-BW3:た。	0.8875
-BW3:社会	0.2024
-BW3:さい	-0.0714
-BW3:らし	0.1479
-BW3:い.	-0.1185
-BW3:始め	0.1681
-BW3:の、	-0.0724
-BW3:が,	0.1816
-BW3:たい	-0.0594
-BW3:った	-0.4748
-BW3:さを	0.0976
-BW3:たの	0.0812
-BW3:日、	0.0974
-BW3:って	0.03
-BW3:べき	0.2181
-BW3:の,	-0.0724
-BW3:に、	-0.1021
-BW3:そう	0.0428
-BW3:カ月	0.099
-BW3:まれ	-0.0793
-BW3:ても	0.0302
-BW3:大会	0.2217
-BW3:たり	-0.1183
-BW3:たる	-0.0853
-BW3:では	0.2295
-BW3:など	0.2135
-BW3:いる	0.56
-BW3:し、	0.1557
-BW3:いわ	0.1527
-BW3:た.	0.8875
-BW3:ある	0.3846
-BW3:あり	0.0719
-BW3:れる	0.1091
-BW3:でに	-0.1482
-BW3:は,	0.1337
-BW3:です	0.1437
-BW3:なく	-0.0903
-BW3:ない	0.1796
-BW3:して	0.1449
-BW3:市	0.0965
-BW3:きた	0.1645
-BW3:しな	0.2608
-BW3:れ,	0.0854
-BW3:どう	0.4664
-BW3:しま	0.12
-BW3:まし	0.1113
-BW3:だ.	0.4098
-BW3:しい	-0.3714
-BW3:し,	0.1557
-BW3:えと	0.1454
-BW3:れば	-0.3246
-BW3:あた	-0.2194
-BW3:がり	-0.2064
-BW3:がら	-0.4977
-BW3:とも	-0.3543
-BW3:した	0.3562
-UC3:A	-0.137
-UC3:I	0.2311
-TW4:からな	-0.2348
-TW4:ません	0.1097
-TW4:という	0.1349
-TW4:ました	0.5543
-TW4:ようと	-0.4258
-TW4:たが、	0.1516
-TW4:してい	0.2958
-TW4:たが,	0.1516
-TW4:ている	0.1538
-TW4:いう。	0.8576
-TW4:いう.	0.8576
-TW4:よると	0.5865
-UC1:A	0.0484
-UC1:K	0.0093
-UC1:M	0.0645
-UC1:O	-0.0505
-UC6:I	-0.0253
-UC6:H	-0.0506
-UC6:K	0.0087
-UC6:M	0.0247
-UC6:O	-0.0387
-UW3:・	-0.3794
-UW3:調	-0.0562
-UW3:ン	0.0278
-UW3:ロ	0.2201
-UW3:ル	0.1591
-UW3:度	0.1452
-UW3:非	0.2066
-UW3:ム	0.1109
-UW3:府	0.1605
-UW3:ト	0.0521
-UW3:く	0.1004
-UW3:ッ	-0.135
-UW3:広	-0.103
-UW3:李	0.3094
-UW3:部	0.12
-UW3:予	-0.1193
-UW3:郡	0.4404
-UW3:二	0.0974
-UW3:法	0.1868
-UW3:員	0.4513
-UW3:森	0.2438
-UW3:村	0.0364
-UW3:郎	0.1026
-UW3:ｸﾞ	0.1319
-UW3:力	0.0365
-UW3:い	0.1006
-UW3:東	-0.0805
-UW3:ほ	-0.5516
-UW3:へ	0.1199
-UW3:主	-0.0758
-UW3:ま	-0.4384
-UW3:ひ	-0.2171
-UW3:共	-0.188
-UW3:ふ	-0.1798
-UW3:開	-0.1432
-UW3:六	0.0755
-UW3:公	-0.303
-UW3:中	0.0653
-UW3:と	0.1691
-UW3:は	0.4555
-UW3:全	0.1574
-UW3:間	0.1302
-UW3:っ	-0.1444
-UW3:ち	-0.0521
-UW3:型	0.1389
-UW3:で	0.2318
-UW3:て	0.6167
-UW3:両	0.3815
-UW3:つ	-0.1081
-UW3:せ	0.3685
-UW3:す	0.0584
-UW3:た	0.0842
-UW3:そ	-0.5228
-UW3:党	0.3593
-UW3:こ	-0.3552
-UW3:げ	0.0401
-UW3:け	0.0388
-UW3:し	-0.0395
-UW3:さ	-0.1058
-UW3:ご	-0.3116
-UW3:か	-0.1163
-UW3:お	-0.4864
-UW3:え	0.1983
-UW3:下	-0.1759
-UW3:が	0.3271
-UW3:あ	-0.2696
-UW3:う	0.2342
-UW3:元	0.4858
-UW3:一	-0.1619
-UW3:政	-0.2013
-UW3:区	0.4646
-UW3:税	0.0401
-UW3:系	0.3066
-UW3:化	0.1327
-UW3:北	-0.1038
-UW3:口	0.0483
-UW3:右	0.1233
-UW3:駅	0.162
-UW3:戸	-0.0488
-UW3:知	-0.1528
-UW3:−	-0.1723
-UW3:妻	0.2016
-UW3:金	0.2163
-UW3:込	-0.1504
-UW3:無	0.0979
-UW3:よ	-0.0202
-UW3:わ	-0.1207
-UW3:を	0.662
-UW3:学	-0.1356
-UW3:当	-0.3885
-UW3:保	-0.2439
-UW3:再	0.3095
-UW3:円	0.5807
-UW3:約	0.3663
-UW3:的	0.7313
-UW3:級	0.1384
-UW3:ｱ	0.0551
-UW3:ｽ	0.0874
-UW3:1	-0.08
-UW3:･	-0.3794
-UW3:ｯ	-0.135
-UW3:市	0.3197
-UW3:用	0.0914
-UW3:能	0.0725
-UW3:別	0.1129
-UW3:昨	-0.0661
-UW3:町	0.1215
-UW3:何	0.4265
-UW3:初	0.2475
-UW3:作	-0.0361
-UW3:決	-0.1073
-UW3:低	0.0811
-UW3:生	-0.0273
-UW3:月	0.4125
-UW3:数	0.3222
-UW3:最	-0.0937
-UW3:選	-0.0681
-UW3:雨	0.2009
-UW3:立	-0.096
-UW3:期	0.036
-UW3:電	-0.1045
-UW3:｣	0.267
-UW3:費	0.1777
-UW3:業	0.0484
-UW3:,	0.4889
-UW3:者	0.6457
-UW3:教	-0.1479
-UW3:務	-0.1872
-UW3:動	-0.0949
-UW3:財	-0.0733
-UW3:指	-0.3973
-UW3:車	0.1835
-UW3:軍	0.1375
-UW3:国	0.0642
-UW3:統	-0.4229
-UW3:直	-0.1835
-UW3:日	0.2099
-UW3:旧	0.5792
-UW3:千	-0.2309
-UW3:午	-0.0783
-UW3:協	-0.1006
-UW3:外	-0.0241
-UW3:建	-0.2352
-UW3:特	-0.385
-UW3:自	-0.2869
-UW3:物	0.0461
-UW3:平	-0.1804
-UW3:海	-0.0495
-UW3:人	0.2742
-UW3:〓	-0.3573
-UW3:」	0.267
-UW3:、	0.4889
-UW3:々	-0.2311
-UW3:長	0.0421
-UW3:〇	0.5827
-UW3:思	-0.1291
-UW3:安	-0.0423
-UW3:州	0.1155
-UW3:み	-0.012
-UW3:実	-0.1008
-UW3:得	0.1905
-UW3:通	-0.1136
-UW3:性	0.1822
-UW3:同	0.3906
-UW3:合	-0.0241
-UW3:各	0.3588
-UW3:時	-0.1248
-UW3:ﾛ	0.2201
-UW3:ﾙ	0.1591
-UW3:家	0.1078
-UW3:ﾝ	0.0278
-UW3:ﾑ	0.1109
-UW3:見	0.1044
-UW3:ﾄ	0.0521
-UW3:新	0.1764
-UW3:に	0.2745
-UW3:な	-0.2788
-UW3:文	-0.1489
-UW3:ど	-0.0899
-UW3:米	0.7767
-UW3:の	0.4056
-UW3:も	0.2323
-UW3:め	0.1205
-UW3:や	-0.0788
-UW3:り	0.0649
-UW3:る	0.5905
-UW3:氏	0.2613
-UW3:ら	0.0727
-UW3:今	0.0792
-UW3:核	0.5156
-UW3:れ	0.2773
-UW3:他	0.1889
-UW3:ん	-0.0518
-UW3:民	-0.1694
-UW3:場	0.1219
-UW3:副	0.4437
-UW3:ア	0.0551
-UW3:分	0.0457
-UW3:以	-0.1368
-UW3:曜	-0.0951
-UW3:グ	0.1319
-UW3:年	0.2416
-UW3:和	-0.0837
-UW3:県	0.6293
-UW3:ス	0.0874
-UW3:前	0.2286
-UW3:１	-0.08
-UW3:総	0.1163
-UW3:少	-0.3102
-UW3:小	-0.0513
-UW3:線	0.1255
-UW3:第	0.1201
-UW3:関	-0.1282
-UW3:英	0.0785
-UW3:私	0.4231
-UW3:世	-0.2087
-UW3:省	0.0792
-UW2:行	0.0838
-UW2:最	-0.063
-UW2:調	0.101
-UW2:立	-0.0763
-UW2:朝	-0.1843
-UW2:本	-0.165
-UW2:,	-0.0829
-UW2:ッ	0.0831
-UW2:事	0.0492
-UW2:目	-0.1584
-UW2:相	-0.0242
-UW2:人	-0.0123
-UW2:東	-0.0931
-UW2:べ	0.1261
-UW2:主	-0.0861
-UW2:ま	0.06
-UW2:太	-0.0483
-UW2:ひ	-0.1273
-UW2:天	-0.0865
-UW2:強	0.1067
-UW2:開	0.1758
-UW2:に	-0.1764
-UW2:な	0.1063
-UW2:ど	0.1273
-UW2:と	-0.0981
-UW2:は	-0.0409
-UW2:の	0.013
-UW2:間	-0.1257
-UW2:入	0.0548
-UW2:だ	0.1837
-UW2:で	-0.0268
-UW2:て	-0.0291
-UW2:つ	-0.0949
-UW2:せ	0.03
-UW2:す	-0.0675
-UW2:た	0.0188
-UW2:そ	-0.1011
-UW2:こ	0.1141
-UW2:世	-0.0302
-UW2:し	0.1529
-UW2:ざ	0.054
-UW2:さ	0.0878
-UW2:か	0.1454
-UW2:お	-0.0502
-UW2:不	-0.215
-UW2:く	-0.0412
-UW2:三	-0.0758
-UW2:が	-0.0856
-UW2:あ	-0.0538
-UW2:う	0.0134
-UW2:い	0.0505
-UW2:政	0.1522
-UW2:区	-0.0422
-UW2:自	-0.1353
-UW2:揺	-0.1033
-UW2:大	-0.1769
-UW2:理	0.0752
-UW2:「	-0.0645
-UW2:」	0.3145
-UW2:次	-0.2378
-UW2:、	-0.0829
-UW2:発	0.0529
-UW2:〇	0.0892
-UW2:実	0.1023
-UW2:西	-0.0744
-UW2:込	0.3041
-UW2:日	-0.1815
-UW2:見	-0.3874
-UW2:子	-0.1519
-UW2:新	-0.1682
-UW2:学	0.076
-UW2:保	0.0362
-UW2:文	-0.1355
-UW2:中	-0.0968
-UW2:手	-0.1519
-UW2:米	0.0509
-UW2:も	-0.1263
-UW2:や	-0.0402
-UW2:り	-0.0579
-UW2:る	-0.0694
-UW2:よ	0.1639
-UW2:れ	0.0571
-UW2:を	-0.2516
-UW2:ん	0.2095
-UW2:気	-0.174
-UW2:民	-0.018
-UW2:副	-0.1566
-UW2:ア	-0.0587
-UW2:ｱ	-0.0587
-UW2:果	-0.0665
-UW2:ｷ	0.0568
-UW2:ｶ	0.0306
-UW2:カ	0.0306
-UW2:キ	0.0568
-UW2:｣	0.3145
-UW2:｢	-0.0645
-UW2:年	-0.106
-UW2:ｯ	0.0831
-UW2:市	-0.0813
-UW2:議	0.1198
-UW2:小	-0.2009
-UW2:第	0.081
-UW2:初	-0.3025
-UW2:北	-0.3414
-UW2:明	-0.1462
-UW2:県	-0.1165
-UW2:会	0.0978
-TC4:IOO	0.0054
-TC4:HIH	0.0804
-TC4:HII	0.0679
-TC4:IIO	0.0656
-TC4:III	0.1497
-TC4:IIH	0.0321
-TC4:IHO	-0.2324
-TC4:MOM	0.0841
-TC4:MHH	-0.0405
-TC4:MHI	0.0201
-TC4:HOH	0.0446
-TC4:KAK	0.4845
-TC4:HHO	0.0669
-TC4:MMM	0.0661
-TC4:IHH	0.0695
-TC4:MMH	-0.0241
-TC4:KKK	0.3065
-TC4:HHK	0.0365
-TC4:HHI	0.1344
-TC4:HHH	-0.0203
-TC4:KKA	0.3386
-TC4:HHN	0.0182
-TC4:HHM	-0.0122
-TQ3:BIIH	-0.0116
-TQ3:BIII	-0.0105
-TQ3:OKHH	0.0587
-TQ3:OIIH	0.1344
-TQ3:BHII	-0.0504
-TQ3:BHIH	0.0222
-TQ3:OOHH	0.011
-TQ3:OKAK	0.2792
-TQ3:BHHH	0.0478
-TQ3:BOMH	0.062
-TQ3:BHHM	-0.1073
-TQ3:OIHH	0.0623
-TQ3:BMHM	-0.0464
-TQ3:OOII	-0.0685
-TQ3:OKKA	0.0679
-TQ3:BMHI	-0.0863
-TQ3:OHHI	0.1729
-TQ3:OHHH	0.0346
-TQ3:OHMH	0.0481
-TQ3:OHII	0.0997
-TC2:OII	-0.2649
-TC2:HMM	-0.1154
-TC2:IHI	-0.1965
-TC2:KKH	0.0703
-TC2:HII	-0.1023
-TC2:HHO	0.2088
-TC3:KOK	-0.1009
-TC3:AAA	-0.0294
-TC3:NNO	0.0662
-TC3:OHO	-0.3393
-TC3:NNH	-0.1689
-TC3:KHH	-0.1216
-TC3:IOI	-0.0542
-TC3:IIM	-0.1035
-TC3:HII	-0.1088
-TC3:HIK	0.0731
-TC3:IIH	-0.0825
-TC3:IHO	-0.1935
-TC3:MHO	0.0123
-TC3:MHM	-0.0457
-TC3:MHH	-0.2694
-TC3:HOH	-0.1486
-TC3:KKH	-0.1217
-TC3:IHH	0.0128
-TC3:IHI	-0.3041
-TC3:MMH	-0.0471
-TC3:HHI	-0.0341
-TC3:HHH	0.0346
-TC3:KKA	0.0491
-UW5:月	-0.4353
-UW5:ン	-0.0343
-UW5:ル	0.0451
-UW5:挙	0.1618
-UW5:語	-0.1073
-UW5:,	0.0465
-UW5:者	-0.2233
-UW5:務	0.3519
-UW5:Ｅ２	-3.2768
-UW5:員	0.2104
-UW5:郎	-0.0368
-UW5:京	0.0722
-UW5:相	0.1319
-UW5:統	0.1955
-UW5:い	0.0331
-UW5:べ	0.1001
-UW5:み	0.0502
-UW5:大	-0.1296
-UW5:日	0.0218
-UW5:に	-0.1224
-UW5:な	-0.0787
-UW5:ど	0.1682
-UW5:と	-0.0127
-UW5:は	-0.0578
-UW5:の	-0.0635
-UW5:間	0.1191
-UW5:っ	0.0052
-UW5:ち	0.1093
-UW5:だ	-0.1186
-UW5:で	-0.085
-UW5:て	-0.0018
-UW5:つ	0.0921
-UW5:す	-0.0852
-UW5:党	-0.0654
-UW5:研	-0.0997
-UW5:げ	-0.0983
-UW5:し	-0.1371
-UW5:空	-0.0813
-UW5:さ	-0.1537
-UW5:か	0.0647
-UW5:お	0.0527
-UW5:え	0.1199
-UW5:く	0.0312
-UW5:ぎ	0.1971
-UW5:き	0.1624
-UW5:が	-0.0421
-UW5:あ	0.1655
-UW5:う	-0.0503
-UW5:E2	-3.2768
-UW5:表	0.0663
-UW5:区	-0.0901
-UW5:「	0.0363
-UW5:館	-0.0689
-UW5:、	0.0465
-UW5:。	-0.0299
-UW5:長	0.0786
-UW5:査	0.0932
-UW5:題	0.2368
-UW5:思	0.0872
-UW5:機	-0.1508
-UW5:定	0.1785
-UW5:.	-0.0299
-UW5:格	0.1356
-UW5:氏	-0.1347
-UW5:ﾙ	0.0451
-UW5:ﾝ	-0.0343
-UW5:社	-0.0278
-UW5:新	-0.1682
-UW5:学	-0.0548
-UW5:中	-0.0871
-UW5:所	-0.0814
-UW5:ゃ	0.335
-UW5:め	0.0865
-UW5:ょ	0.0854
-UW5:り	-0.0208
-UW5:る	0.0429
-UW5:的	-0.3149
-UW5:わ	0.0419
-UW5:れ	0.0504
-UW5:を	-0.1264
-UW5:ん	0.0327
-UW5:ｲ	0.0241
-UW5:イ	0.0241
-UW5:会	-0.1153
-UW5:嵐	-0.1304
-UW5:1	-0.0514
-UW5:｢	0.0363
-UW5:年	0.1763
-UW5:１	-0.0514
-UW5:市	-0.2991
-UW5:議	0.1219
-UW5:田	0.024
-UW5:選	-0.1018
-UW5:町	-0.3912
-UW5:]	-0.2762
-UW5:席	0.0921
-UW5:告	0.0848
-UW5:県	-0.4003
-UW5:省	-0.1052
-TC1:AAA	0.1093
-TC1:HOM	-0.0331
-TC1:HOH	-0.039
-TC1:OOI	-0.1832
-TC1:IOM	0.0467
-TC1:IHI	0.1169
-TC1:MMH	0.0187
-TC1:IOI	-0.1015
-TC1:IOH	-0.0142
-TC1:HII	0.0998
-TC1:HHH	0.1029
-TC1:HHM	0.058
-UC4:A	-0.2643
-UC4:I	-0.1032
-UC4:H	0.1809
-UC4:K	-0.345
-UC4:M	0.3565
-UC4:O	0.6646
-UC4:N	0.3876
-UQ2:OK	0.1759
-UQ2:BH	0.0216
-UQ2:BI	0.0113
-UW4:ー	-1.187
-UW4:行	-0.0792
-UW4:規	0.0792
-UW4:・	-0.4371
-UW4:園	-0.12
-UW4:ン	-0.3637
-UW4:ラ	-0.0881
-UW4:ル	-0.0856
-UW4:リ	-0.0541
-UW4:メ	-0.1635
-UW4:ぎ	-0.3821
-UW4:地	0.0866
-UW4:ト	-0.0403
-UW4:庁	-0.4556
-UW4:ッ	-0.0724
-UW4:率	0.0672
-UW4:予	0.0782
-UW4:事	-0.019
-UW4:井	-0.1768
-UW4:員	-0.091
-UW4:郎	-0.4866
-UW4:塁	-0.2094
-UW4:署	0.0749
-UW4:来	-0.0442
-UW4:力	-0.0302
-UW4:い	-0.3435
-UW4:賞	0.073
-UW4:ほ	0.1464
-UW4:べ	-0.0744
-UW4:へ	0.6665
-UW4:み	-0.2082
-UW4:ま	0.1051
-UW4:び	-0.4134
-UW4:ひ	0.4249
-UW4:ば	0.194
-UW4:共	-0.1212
-UW4:ふ	0.1345
-UW4:に	0.6499
-UW4:な	0.5433
-UW4:中	0.221
-UW4:と	0.4547
-UW4:は	0.8578
-UW4:の	0.7396
-UW4:ね	0.1413
-UW4:ぬ	0.1853
-UW4:っ	-0.5882
-UW4:ち	-0.3654
-UW4:だ	0.5408
-UW4:で	0.741
-UW4:て	0.3994
-UW4:つ	-0.1659
-UW4:せ	0.0181
-UW4:ず	0.1251
-UW4:す	-0.0731
-UW4:じ	-0.2506
-UW4:た	0.5034
-UW4:そ	0.4091
-UW4:党	-0.2006
-UW4:こ	0.2255
-UW4:げ	-0.4734
-UW4:け	-0.4376
-UW4:し	-0.0843
-UW4:さ	0.2864
-UW4:ご	0.1979
-UW4:か	0.053
-UW4:お	0.2405
-UW4:え	-0.2514
-UW4:く	-0.3788
-UW4:先	0.0601
-UW4:き	-0.4482
-UW4:が	0.6006
-UW4:あ	0.4752
-UW4:う	-0.064
-UW4:一	-0.2069
-UW4:島	-0.2056
-UW4:改	0.0787
-UW4:士	-0.1413
-UW4:政	0.2182
-UW4:区	0.4517
-UW4:野	-0.11
-UW4:支	0.0856
-UW4:系	0.0786
-UW4:館	-0.1984
-UW4:化	0.0776
-UW4:参	0.1555
-UW4:込	-0.337
-UW4:.	0.3508
-UW4:よ	0.3351
-UW4:子	-0.4802
-UW4:学	-0.1397
-UW4:感	0.0916
-UW4:校	-0.036
-UW4:般	-0.0852
-UW4:内	0.0584
-UW4:円	0.0788
-UW4:題	-0.0792
-UW4:高	0.212
-UW4:約	0.2171
-UW4:的	0.2586
-UW4:銀	-0.2213
-UW4:屋	-0.1328
-UW4:済	-0.0543
-UW4:ｰ	-1.187
-UW4:輪	-0.1433
-UW4:山	-0.15
-UW4:ｺ	0.1789
-UW4:ｾ	0.1287
-UW4:｣	0.3798
-UW4:｢	0.1895
-UW4:際	-0.2604
-UW4:･	-0.4371
-UW4:ｯ	-0.0724
-UW4:産	-0.1101
-UW4:市	0.2771
-UW4:能	-0.073
-UW4:田	-0.29
-UW4:選	0.2596
-UW4:町	0.1826
-UW4:間	-0.2344
-UW4:ｶ	0.2145
-UW4:体	-0.1286
-UW4:初	0.1347
-UW4:作	0.053
-UW4:カ	0.2145
-UW4:寺	-0.0809
-UW4:側	0.4292
-UW4:道	-0.1291
-UW4:生	-0.1286
-UW4:月	-0.9066
-UW4:都	0.1192
-UW4:最	0.0845
-UW4:立	-0.2112
-UW4:電	-0.0878
-UW4:沢	-0.0939
-UW4:業	-0.1043
-UW4:,	0.393
-UW4:者	0.2145
-UW4:教	0.0704
-UW4:務	-0.2715
-UW4:動	-0.074
-UW4:車	-0.1481
-UW4:回	0.15
-UW4:軍	0.1158
-UW4:経	0.1146
-UW4:国	-0.0619
-UW4:目	0.0922
-UW4:統	-0.1169
-UW4:大	0.0571
-UW4:日	0.1798
-UW4:谷	-0.1
-UW4:空	-0.0867
-UW4:協	0.1013
-UW4:多	0.1067
-UW4:領	-0.1659
-UW4:物	-0.0735
-UW4:人	0.1036
-UW4:〓	-0.5156
-UW4:球	-0.1267
-UW4:「	0.1895
-UW4:」	0.3798
-UW4:、	0.393
-UW4:。	0.3508
-UW4:長	0.0357
-UW4:〇	0.4999
-UW4:川	-0.2667
-UW4:定	-0.1057
-UW4:性	0.0553
-UW4:合	-0.1834
-UW4:後	0.0456
-UW4:時	0.1829
-UW4:首	0.1749
-UW4:ﾙ	-0.0856
-UW4:近	0.0929
-UW4:ﾒ	-0.1635
-UW4:ﾗ	-0.0881
-UW4:方	-0.0856
-UW4:―	-0.4841
-UW4:ﾄ	-0.0403
-UW4:文	0.0522
-UW4:所	-0.1566
-UW4:米	0.2937
-UW4:も	0.4169
-UW4:ゃ	-0.2666
-UW4:む	-0.0882
-UW4:め	-0.5046
-UW4:ょ	-0.1544
-UW4:や	0.2795
-UW4:院	-0.2297
-UW4:り	-0.9726
-UW4:る	-1.4896
-UW4:氏	0.5388
-UW4:ら	-0.2922
-UW4:わ	-0.1783
-UW4:れ	-0.2613
-UW4:ろ	-0.457
-UW4:を	1.315
-UW4:ん	-0.2352
-UW4:気	-0.091
-UW4:民	-0.2716
-UW4:場	-0.141
-UW4:ﾘ	-0.0541
-UW4:副	0.3879
-UW4:以	0.0544
-UW4:会	0.095
-UW4:ﾝ	-0.3637
-UW4:コ	0.1789
-UW4:年	0.0374
-UW4:和	-0.0681
-UW4:セ	0.1287
-UW4:前	0.1623
-UW4:器	-0.0851
-UW4:総	0.094
-UW4:議	-0.0244
-UW4:小	0.191
-UW4:警	-0.1184
-UW4:線	-0.0994
-UW4:第	0.0788
-UW4:県	0.2997
-UW4:木	-0.0485
-UW4:省	-0.3485
-UQ3:ON	-0.3212
-UQ3:BA	-0.0479
-UQ3:OI	-0.0827
-UQ3:BM	0.316
-UQ3:BN	0.6427
-UQ3:BO	1.4761
-UQ3:BH	0.0042
-UQ3:BI	0.1913
-UQ3:BK	-0.7198
-TQ1:OIHI	0.02
-TQ1:OIIH	-0.0068
-TQ1:BIII	0.1595
-TQ1:OAKK	0.0482
-TQ1:BIHH	0.006
-TQ1:BHIH	-0.0132
-TQ1:BHHH	-0.0227
-TQ1:BHHI	0.0316
-TQ1:BOHH	0.0225
-TQ1:BOOO	-0.0908
-TQ1:OHHH	0.0281
-TQ1:BNHH	-0.0744
-TQ1:OHIH	0.0249
-UC5:I	-0.1238
-UC5:H	0.0313
-UC5:K	-0.0799
-UC5:M	0.0539
-UC5:O	-0.0831
-TQ4:BIIH	-0.0607
-TQ4:BIII	-0.2181
-TQ4:OAKK	0.018
-TQ4:OIIH	0.0626
-TQ4:BHII	-0.0966
-TQ4:OIHI	-0.0493
-TQ4:BHHH	-0.0721
-TQ4:OIII	-0.4007
-TQ4:BHHM	-0.3604
-TQ4:OIHH	0.1935
-TQ4:OHIH	-0.1573
-TQ4:OKAK	-0.8156
-TQ4:OHHI	0.2446
-TQ4:OHHH	-0.0294
-TQ4:OAAA	-0.2763
-TQ4:OHHO	0.048
-TW2:その後	-0.443
-TW2:社会党	-0.3216
-TW2:もので	0.1882
-TW2:ていた	0.1833
-TW2:大きな	-0.1255
-TW2:ころが	-0.2434
-TW2:同時に	-0.8097
-TW2:一気に	-0.0792
-TW2:ともに	-0.4517
-TW2:だって	-0.1049
-TW2:対して	-0.2721
-TW2:として	-0.4657
-TW2:いった	-0.1256
-TW2:ある程	-0.2049
-TW2:初めて	-0.1512
-TW2:しょう	0.3873
-TW1:東京都	0.2026
-TW1:につい	-0.4681
-UW1:も	-0.0466
-UW1:主	-0.0402
-UW1:大	0.0561
-UW1:や	-0.047
-UW1:･	-0.0135
-UW1:り	0.0208
-UW1:日	-0.0141
-UW1:よ	0.0182
-UW1:ら	-0.0292
-UW1:区	-0.0912
-UW1:れ	0.0169
-UW1:京	-0.0268
-UW1:に	-0.0789
-UW1:ん	-0.0137
-UW1:ど	-0.0123
-UW1:と	-0.0547
-UW1:は	-0.0847
-UW1:の	-0.0185
-UW1:都	-0.0718
-UW1:あ	-0.0941
-UW1:市	-0.0411
-UW1:委	0.0729
-UW1:で	-0.0201
-UW1:県	-0.0386
-UW1:を	-0.0446
-UW1:国	-0.046
-UW1:・	-0.0135
-UW1:こ	0.0505
-UW1:理	0.0361
-UW1:午	0.0871
-UW1:,	0.0156
-UW1:｢	-0.0463
-UW1:「	-0.0463
-UW1:き	0.0121
-UW1:が	-0.0553
-UW1:、	0.0156
-UW1:う	-0.0127
-UW1:生	-0.0408
-UP3:B	0.0189
-BP1:OO	-0.0125
-BP1:OB	0.0304
-BP1:BB	0.0295
-BP1:UB	0.0352
-TW3:いただ	-0.1734
-TW3:してい	0.1314
-TW3:十二月	-0.2287
-TW3:れから	-0.3752
-TW3:のもの	-0.06
-TW3:にとっ	-0.5989
-TW3:に当た	-0.6247
-TW3:ので、	-0.0727
-TW3:ので,	-0.0727
-TW3:につい	-0.5483
-TW3:として	-0.4314
-BQ4:BMI	-0.3385
-BQ4:OAH	0.0926
-BQ4:BOO	-1.2396
-BQ4:OHH	0.0266
-BQ4:BHH	-0.3895
-BQ4:ONN	-0.0973
-BQ4:BIK	0.1348
-BQ4:BIH	0.3761
-BQ4:BII	-0.4654
-BQ4:OHK	-0.2036
-BQ4:BKK	-0.1806
-BP2:OO	-0.1762
-BP2:BO	0.006
-BQ2:BHI	-0.1159
-BQ2:BHH	0.0118
-BQ2:UHI	-0.1146
-BQ2:BHM	0.0466
-BQ2:BIH	-0.0919
-BQ2:OHM	-0.0181
-BQ2:OHH	-0.1139
-BQ2:BKO	0.0864
-BQ2:OIH	0.0153
-BQ2:BKK	-0.172
-BQ3:BHI	0.2664
-BQ3:BHH	-0.0792
-BQ3:OHM	0.0439
-BQ3:OHH	0.2174
-BQ3:OII	0.028
-BQ3:BII	-0.0299
-BQ3:BMH	0.0937
-BQ3:OMH	-0.2402
-BQ3:BKI	0.0419
-BQ3:BMM	0.8335
-BQ3:BOH	0.0775
-BQ3:BNN	0.0998
-BQ3:OKI	-0.0793
-BQ3:OKH	0.1798
-BQ3:OOO	1.1699
-BQ3:OKO	-0.2242
-TQ2:BIII	-0.1033
-TQ2:BIHH	-0.1401
-TQ2:BKAK	-0.0543
-TQ2:BOOO	-0.5591
-BQ1:BOH	-0.0091
-BQ1:BNH	0.0449
-BQ1:BOO	-0.2597
-BQ1:BHH	0.115
-BQ1:BIM	0.0886
-BQ1:BHM	0.1521
-BQ1:OHI	0.0451
-BQ1:BII	-0.1158
-BQ1:BMH	0.1208
-BQ1:OIH	-0.0296
-BQ1:OKA	0.1851
-BQ1:OKH	-0.102
-BQ1:OKK	0.0904
-BQ1:OOO	0.2965
-UQ1:OO	-0.2422
-UQ1:OK	0.041
-UQ1:OI	0.0477
-UQ1:OH	-0.0095
-UQ1:BN	0.0142
-UQ1:BO	-0.0056
-UQ1:BH	0.0021
-UQ1:BI	-0.0012
-UQ1:BK	-0.0099
-UC2:A	0.0819
-UC2:I	0.0409
-UC2:H	0.1059
-UC2:M	0.3987
-UC2:O	0.0646
-UC2:N	0.5775
-UP1:O	-0.0214
-UP2:B	0.0069
-UP2:O	0.0935
-BC1:II	0.2461
-BC1:HH	0.0006
-BC1:KH	0.0406
-BC1:OH	-0.1378
-BC2:AA	-0.3267
-BC2:OO	-0.292
-BC2:AI	0.2744
-BC2:KI	0.3831
-BC2:IK	0.1721
-BC2:MK	0.3334
-BC2:AN	-0.0878
-BC2:II	-0.1332
-BC2:IH	-0.1184
-BC2:HH	-0.407
-BC2:MH	-0.3132
-BC2:HN	0.4012
-BC2:HO	0.3761
-BC2:IO	0.5492
-BC2:HM	-0.1711
-BC2:IA	0.1327
-BC2:KK	-0.8741
-BC3:HK	-0.0721
-BC3:HH	0.0996
-BC3:HI	0.0626
-BC3:HN	-0.1307
-BC3:HO	-0.0836
-BC3:IH	-0.0301
-BC3:KK	0.2762
-BC3:OH	0.0266
-BC3:OA	-0.1652
-BC3:MM	0.4034
-BC3:MK	0.1079
diff --git a/src/trainer.rs b/src/trainer.rs
index 1bce18d..4750dea 100644
--- a/src/trainer.rs
+++ b/src/trainer.rs
@@ -83,3 +83,79 @@ impl Trainer {
         Ok(self.learner.get_metrics())
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::adaboost::Metrics;
+    use std::io::Write;
+    use std::sync::atomic::AtomicBool;
+    use std::sync::Arc;
+    use tempfile::NamedTempFile;
+
+    // Helper: create a dummy features file.
+    // This file should contain at least one line for initialize_features and initialize_instances.
+    fn create_dummy_features_file() -> NamedTempFile {
+        let mut file = NamedTempFile::new().expect("Failed to create temp file for features");
+
+        // For example, it could contain "1 feature1" to represent one feature.
+        writeln!(file, "1 feature1").expect("Failed to write to features file");
+        file
+    }
+
+    // Helper: create a dummy model file.
+    // This file should contain the model weights and bias.
+    fn create_dummy_model_file() -> NamedTempFile {
+        let mut file = NamedTempFile::new().expect("Failed to create temp file for model");
+
+        // For example, it could contain a single feature weight and a bias term.
+        // The feature line is "BW1:こん	-0.1262" and the last line is the bias term "100.0".
+        writeln!(file, "BW1:こん\t-0.1262").expect("Failed to write feature");
+        writeln!(file, "100.0").expect("Failed to write bias");
+        file
+    }
+
+    #[test]
+    fn test_load_model() -> Result<(), Box<dyn std::error::Error>> {
+        // Prepare a dummy features file
+        let features_file = create_dummy_features_file();
+
+        // Create a Trainer instance
+        let mut trainer = Trainer::new(0.01, 10, 1, features_file.path());
+
+        // Prepare a dummy model file
+        let model_file = create_dummy_model_file();
+
+        // Load the model file into the Trainer
+        // This should not return an error if the model file is correctly formatted.
+        // If the model file is not correctly formatted, it will return an error.
+        trainer.load_model(model_file.path())?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_train() -> Result<(), Box<dyn std::error::Error>> {
+        // Prepare a dummy features file
+        let features_file = create_dummy_features_file();
+
+        // Create a Trainer instance with the dummy features file
+        let mut trainer = Trainer::new(0.01, 5, 1, features_file.path());
+
+        // Prepare a temporary file for the model output
+        let model_out = NamedTempFile::new()?;
+
+        // Set AtomicBool to false and immediately exit the learning loop
+        let running = Arc::new(AtomicBool::new(false));
+
+        // Execute the train method.
+        let metrics: Metrics = trainer.train(running, model_out.path())?;
+
+        // Check if the metrics are valie.
+        // Since metrics are dummy data, we will consider anything 0 or above to be OK here.
+        assert!(metrics.accuracy >= 0.0);
+        assert!(metrics.precision >= 0.0);
+        assert!(metrics.recall >= 0.0);
+        Ok(())
+    }
+}

From cbe9a6d3601c5f3625b787ce9b62772e0d69593a Mon Sep 17 00:00:00 2001
From: Minoru Osuka <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 22:26:14 +0900
Subject: [PATCH 13/15] Add tests

---
 src/adaboost.rs  | 165 +++++++++++++++++++++++++++++++++++++++++++++++
 src/segmenter.rs |   4 +-
 src/trainer.rs   |   5 +-
 3 files changed, 171 insertions(+), 3 deletions(-)

diff --git a/src/adaboost.rs b/src/adaboost.rs
index 2272343..7dd9cb9 100644
--- a/src/adaboost.rs
+++ b/src/adaboost.rs
@@ -407,3 +407,168 @@ impl AdaBoost {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::collections::HashSet;
+    use std::io::Write;
+    use std::sync::atomic::AtomicBool;
+    use std::sync::Arc;
+
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn test_initialize_features() -> std::io::Result<()> {
+        // Create a dummy features file
+        let mut features_file = NamedTempFile::new()?;
+        writeln!(features_file, "1 feat1 feat2")?;
+        writeln!(features_file, "0 feat3")?;
+        features_file.as_file().sync_all()?;
+
+        let mut learner = AdaBoost::new(0.01, 10, 1);
+        learner.initialize_features(features_file.path())?;
+
+        // Features is an ordered set that should contain ""(empty string), "feat1", "feat2", "feat3"
+        assert!(learner.features.contains(&"".to_string()));
+        assert!(learner.features.contains(&"feat1".to_string()));
+        assert!(learner.features.contains(&"feat2".to_string()));
+        assert!(learner.features.contains(&"feat3".to_string()));
+        Ok(())
+    }
+
+    #[test]
+    fn test_initialize_instances() -> std::io::Result<()> {
+        // First, initialize features in the feature file.
+        let mut features_file = NamedTempFile::new()?;
+        writeln!(features_file, "1 feat1 feat2")?;
+        features_file.as_file().sync_all()?;
+
+        let mut learner = AdaBoost::new(0.01, 10, 1);
+        learner.initialize_features(features_file.path())?;
+
+        // Create a dummy instance file
+        let mut instance_file = NamedTempFile::new()?;
+        // Example: "1 feat1" line. The learner will consider feat1 as a candidate if found by binary_search.
+        writeln!(instance_file, "1 feat1")?;
+        instance_file.as_file().sync_all()?;
+
+        learner.initialize_instances(instance_file.path())?;
+
+        // The number of instances should be 1, and the instance_weights, labels, and instances should be updated accordingly.
+        assert_eq!(learner.num_instances, 1);
+        assert_eq!(learner.labels.len(), 1);
+        assert_eq!(learner.instance_weights.len(), 1);
+        assert_eq!(learner.instances.len(), 1);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_train() -> std::io::Result<()> {
+        // Initialize features using a features file.
+        let mut features_file = NamedTempFile::new()?;
+        writeln!(features_file, "1 feat1 feat2")?;
+        features_file.as_file().sync_all()?;
+
+        let mut learner = AdaBoost::new(0.01, 3, 1);
+        learner.initialize_features(features_file.path())?;
+
+        // Create a dummy instance file with one instance.
+        let mut instance_file = NamedTempFile::new()?;
+        writeln!(instance_file, "1 feat1")?;
+        instance_file.as_file().sync_all()?;
+        learner.initialize_instances(instance_file.path())?;
+
+        // Set running to false to immediately exit the learning loop.
+        let running = Arc::new(AtomicBool::new(false));
+        learner.train(running.clone());
+
+        // If normalization of model or instance_weights is performed after learning, it should be OK.
+        let weight_sum: f64 = learner.instance_weights.iter().sum();
+
+        // weight_sum should be normalized to 1.0.
+        assert!((weight_sum - 1.0).abs() < 1e-6);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_save_and_load_model() -> std::io::Result<()> {
+        // Prepare a dummy learner.
+        let mut learner = AdaBoost::new(0.01, 10, 1);
+
+        // Set the features and weights in advance.
+        learner.features = vec!["feat1".to_string(), "feat2".to_string()];
+        learner.model = vec![0.5, -0.3];
+
+        // Save the model to a temporary file.
+        let temp_model = NamedTempFile::new()?;
+        learner.save_model(temp_model.path())?;
+
+        // Load the model with a new learner.
+        let mut learner2 = AdaBoost::new(0.01, 10, 1);
+        learner2.load_model(temp_model.path())?;
+
+        // Check that the number of features and models match.
+        assert_eq!(learner2.features.len(), learner.features.len());
+        assert_eq!(learner2.model.len(), learner.model.len());
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_add_instance_and_predict() {
+        let mut learner = AdaBoost::new(0.01, 10, 1);
+
+        // Here, features and model are empty in the initial state. They are newly registered by add_instance.
+        let mut attrs = HashSet::new();
+        attrs.insert("A".to_string());
+        learner.add_instance(attrs.clone(), 1);
+
+        // When the same attribute is passed to predict, score returns 1 based on the initial model value (0.0) (because score>=0).
+        let prediction = learner.predict(attrs);
+        assert_eq!(prediction, 1);
+    }
+
+    #[test]
+    fn test_get_bias() {
+        let mut learner = AdaBoost::new(0.01, 10, 1);
+
+        // Set model weights as an example.
+        learner.model = vec![0.2, 0.3, -0.1];
+
+        // bias = -sum(model)/2 = -(0.2+0.3-0.1)/2 = -0.4/2 = -0.2
+        assert!((learner.get_bias() + 0.2).abs() < 1e-6);
+    }
+
+    #[test]
+    fn test_get_metrics() {
+        let mut learner = AdaBoost::new(0.01, 10, 1);
+
+        // Set features and model for prediction
+        learner.features = vec!["A".to_string(), "B".to_string()];
+        learner.model = vec![0.5, -1.0];
+
+        // Instance 1: Attribute “A” → score = 0.25 + 0.5 = 0.75 (positive example)
+        let mut attrs1 = HashSet::new();
+        attrs1.insert("A".to_string());
+        learner.add_instance(attrs1, 1);
+
+        // Instance 2: Attribute “B” → score = 0.25 + (-1.0) = -0.75 (negative example)
+        let mut attrs2 = HashSet::new();
+        attrs2.insert("B".to_string());
+        learner.add_instance(attrs2, -1);
+
+        let metrics = learner.get_metrics();
+        assert_eq!(metrics.true_positives, 1);
+        assert_eq!(metrics.true_negatives, 1);
+        assert_eq!(metrics.false_positives, 0);
+        assert_eq!(metrics.false_negatives, 0);
+        assert_eq!(metrics.num_instances, 2);
+
+        // Since this is a simple case, the accuracy is 100%.
+        assert!((metrics.accuracy - 100.0).abs() < 1e-6);
+    }
+}
diff --git a/src/segmenter.rs b/src/segmenter.rs
index fb5af4a..7b69ecb 100644
--- a/src/segmenter.rs
+++ b/src/segmenter.rs
@@ -286,10 +286,10 @@ impl Segmenter {
 
 #[cfg(test)]
 mod tests {
-    use std::path::PathBuf;
-
     use super::*;
 
+    use std::path::PathBuf;
+
     #[test]
     fn test_add_sentence_with_writer() {
         let mut segmenter = Segmenter::new(None);
diff --git a/src/trainer.rs b/src/trainer.rs
index 4750dea..280e38b 100644
--- a/src/trainer.rs
+++ b/src/trainer.rs
@@ -87,12 +87,15 @@ impl Trainer {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::adaboost::Metrics;
+
     use std::io::Write;
     use std::sync::atomic::AtomicBool;
     use std::sync::Arc;
+
     use tempfile::NamedTempFile;
 
+    use crate::adaboost::Metrics;
+
     // Helper: create a dummy features file.
     // This file should contain at least one line for initialize_features and initialize_instances.
     fn create_dummy_features_file() -> NamedTempFile {

From f555d1710843f1f4ef9a35b46c46ae7ea3e7942d Mon Sep 17 00:00:00 2001
From: Minoru Osuka <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 22:28:51 +0900
Subject: [PATCH 14/15] Update README.md

---
 README.md | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 727f0ed..6d0ad4d 100644
--- a/README.md
+++ b/README.md
@@ -80,14 +80,17 @@ The output from the `train` command is similar to:
 
 ```text
 finding instances...: 61 instances found
-
+loading instances...: 61/61 instances loaded
 Iteration 9999 - margin: 0.16068839956263622
-Result:
-Accuracy: 100.00% (61 / 61)
-Precision: 100.00% (24 / 24)
-Recall: 100.00% (24 / 24)
-Confusion Matrix: TP: 24, FP: 0, FN: 0, TN: 37
-Training completed successfully.
+Result Metrics:
+  Accuracy: 100.00% ( 61 / 61 )
+  Precision: 100.00% ( 24 / 24 )
+  Recall: 100.00% ( 24 / 24 )
+  Confusion Matrix:
+    True Positives: 24
+    False Positives: 0
+    False Negatives: 0
+    True Negatives: 37
 ```
 
 ## How to segment sentences into words

From f1f1373f3775cb21172d8c978da74ce4cb613903 Mon Sep 17 00:00:00 2001
From: Minoru Osuka <minoru.osuka@gmail.com>
Date: Wed, 4 Jun 2025 22:29:37 +0900
Subject: [PATCH 15/15] Bump up version to 0.2.0

---
 Cargo.lock | 2 +-
 Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4594d15..16406bb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -226,7 +226,7 @@ checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
 
 [[package]]
 name = "litsea"
-version = "0.1.0"
+version = "0.2.0"
 dependencies = [
  "clap",
  "ctrlc",
diff --git a/Cargo.toml b/Cargo.toml
index 1083139..f4e8db2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "litsea"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2021"
 description = "Litsea is an extreamely compact word segmentation and model training tool implemented in Rust."
 documentation = "https://docs.rs/litsea"