+
Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/uu/sort/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ self_cell = { workspace = true }
tempfile = { workspace = true }
thiserror = { workspace = true }
unicode-width = { workspace = true }
uucore = { workspace = true, features = ["fs", "parser", "version-cmp"] }
uucore = { workspace = true, features = ["fs", "parser", "version-cmp", "i18n-collator"] }
fluent = { workspace = true }

[target.'cfg(target_os = "linux")'.dependencies]
Expand Down
5 changes: 5 additions & 0 deletions src/uu/sort/locales/en-US.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ sort-after-help = The key format is FIELD[.CHAR][OPTIONS][,FIELD[.CHAR]][OPTIONS

Valid options are: MbdfhnRrV. They override the global options for this key.

Locale-aware sorting:
The LC_ALL, LC_COLLATE, and LANG environment variables affect sorting order.
LC_ALL=C uses fast byte-wise comparison. Other locales use slower but correct Unicode collation.
For performance-critical scenarios with ASCII data, consider using LC_ALL=C.

# Error messages
sort-open-failed = open failed: {$path}: {$error}
sort-parse-key-error = failed to parse key {$key}: {$msg}
Expand Down
5 changes: 5 additions & 0 deletions src/uu/sort/locales/fr-FR.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ sort-after-help = Le format de clé est CHAMP[.CAR][OPTIONS][,CHAMP[.CAR]][OPTIO

Les options valides sont : MbdfhnRrV. Elles remplacent les options globales pour cette clé.

Tri selon la locale :
Les variables d'environnement LC_ALL, LC_COLLATE et LANG affectent l'ordre de tri.
LC_ALL=C utilise une comparaison rapide par octets. D'autres locales utilisent une collation Unicode plus lente mais correcte.
Pour des scénarios critiques en performance avec des données ASCII, considérez l'utilisation de LC_ALL=C.

# Messages d'erreur
sort-open-failed = échec d'ouverture : {$path} : {$error}
sort-parse-key-error = échec d'analyse de la clé {$key} : {$msg}
Expand Down
5 changes: 3 additions & 2 deletions src/uu/sort/src/custom_str_cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//! The goal is to compare strings without transforming them first (i.e. not allocating new strings)

use std::cmp::Ordering;
use uucore::i18n::collator::locale_cmp;

fn filter_char(c: u8, ignore_non_printing: bool, ignore_non_dictionary: bool) -> bool {
if ignore_non_dictionary && !(c.is_ascii_alphanumeric() || c.is_ascii_whitespace()) {
Expand Down Expand Up @@ -35,8 +36,8 @@ pub fn custom_str_cmp(
ignore_case: bool,
) -> Ordering {
if !(ignore_case || ignore_non_dictionary || ignore_non_printing) {
// There are no custom settings. Fall back to the default strcmp, which is faster.
return a.cmp(b);
// There are no custom settings. Fall back to locale-aware comparison.
return locale_cmp(a, b);
}
let mut a_chars = a
.iter()
Expand Down
28 changes: 28 additions & 0 deletions src/uu/sort/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ use uucore::error::{FromIo, strip_errno};
use uucore::error::{UError, UResult, USimpleError, UUsageError};
use uucore::extendedbigdecimal::ExtendedBigDecimal;
use uucore::format_usage;
use uucore::i18n::collator::CollatorOptions;
use uucore::line_ending::LineEnding;
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
use uucore::parser::parse_size::{ParseSizeError, Parser};
Expand Down Expand Up @@ -1037,6 +1038,28 @@ fn get_rlimit() -> UResult<usize> {

const STDIN_FILE: &str = "-";

/// Check if locale-aware collation will be needed based on sort settings and locale
fn will_need_locale_collation(settings: &GlobalSettings) -> bool {
// First check if we're using the C locale (DEFAULT_LOCALE), which doesn't need collator
let (locale, _) = uucore::i18n::get_collating_locale();
if *locale == uucore::i18n::DEFAULT_LOCALE {
return false;
}

// Check each selector to see if any would use locale comparison
for selector in &settings.selectors {
let key_settings = &selector.settings;
if key_settings.mode == SortMode::Default
&& !key_settings.ignore_case
&& !key_settings.ignore_non_printing
&& !key_settings.dictionary_order
{
return true;
}
}
false
}

#[uucore::main]
#[allow(clippy::cognitive_complexity)]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
Expand Down Expand Up @@ -1318,6 +1341,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {

settings.init_precomputed();

// Initialize locale-aware collator only if needed for string comparisons
if will_need_locale_collation(&settings) {
uucore::i18n::collator::try_init_collator(CollatorOptions::default());
}

let result = exec(&mut files, &settings, output, &mut tmp_dir);
// Wait here if `SIGINT` was received,
// for signal handler to do its work and terminate the program.
Expand Down
4 changes: 2 additions & 2 deletions src/uucore/src/lib/features/i18n/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub enum UEncoding {
Utf8,
}

const DEFAULT_LOCALE: Locale = locale!("en-US-posix");
pub const DEFAULT_LOCALE: Locale = locale!("en-US-posix");

/// Look at 3 environment variables in the following order
///
Expand Down Expand Up @@ -64,7 +64,7 @@ fn get_locale_from_env(locale_name: &str) -> (Locale, UEncoding) {
}

/// Get the collating locale from the environment
fn get_collating_locale() -> &'static (Locale, UEncoding) {
pub fn get_collating_locale() -> &'static (Locale, UEncoding) {
static COLLATING_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();

COLLATING_LOCALE.get_or_init(|| get_locale_from_env("LC_COLLATE"))
Expand Down
20 changes: 20 additions & 0 deletions tests/by-util/test_sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1906,4 +1906,24 @@ fn test_color_environment_variables() {
}
}

#[test]
fn test_locale_sorting_c() {
// Test LC_ALL=C locale sorting (should sort by byte values)
new_ucmd!()
.env("LC_ALL", "C")
.pipe_in("a\no\nu\nä\nö\nü\n")
.succeeds()
.stdout_is("a\no\nu\nä\nö\nü\n");
}

#[test]
fn test_locale_sorting_german() {
// Test LC_ALL=de_DE.utf-8 locale sorting (should respect German collation)
new_ucmd!()
.env("LC_ALL", "de_DE.utf-8")
.pipe_in("a\no\nu\nä\nö\nü\n")
.succeeds()
.stdout_is("a\nä\no\nö\nu\nü\n");
}

/* spell-checker: enable */
Loading
点击 这是indexloc提供的php浏览器服务,不要输入任何密码和下载