From ab286c7b9a4983c100a003630aaa48ce3e8f33b9 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 25 Dec 2025 22:22:01 +0900 Subject: [PATCH 01/11] feat(sort): add locale-aware numeric sorting support Implement NumericLocaleSettings to handle thousands separators and decimal points based on locale. Update tokenization logic to accommodate blank thousands separators for numeric and human-numeric modes, improving parsing of locale-specific numbers. Also refactor numeric locale detection for safety/readability and clean up related initialization/spell-checker ignore. --- src/uu/sort/src/sort.rs | 173 +++++++++++++++++++++++++++++++++------- 1 file changed, 146 insertions(+), 27 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 6c6091e92c0..5f7bb8ef24f 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -7,7 +7,7 @@ // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html // https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html -// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit behaviour keydef GETFD +// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit behaviour keydef GETFD localeconv mod buffer_hint; mod check; @@ -284,9 +284,35 @@ pub struct GlobalSettings { buffer_size_is_explicit: bool, compress_prog: Option, merge_batch_size: usize, + numeric_locale: NumericLocaleSettings, precomputed: Precomputed, } +#[derive(Clone, Copy, Debug)] +struct NumericLocaleSettings { + thousands_sep: Option, + decimal_pt: Option, +} + +impl Default for NumericLocaleSettings { + fn default() -> Self { + Self { + thousands_sep: None, + decimal_pt: Some(DECIMAL_PT), + } + } +} + +impl NumericLocaleSettings { + fn num_info_settings(&self, accept_si_units: bool) -> NumInfoParseSettings { + NumInfoParseSettings { + accept_si_units, + thousands_separator: self.thousands_sep, + decimal_pt: self.decimal_pt, + } + } +} + /// Data needed for sorting. Should be computed once before starting to sort /// by calling `GlobalSettings::init_precomputed`. #[derive(Clone, Debug, Default)] @@ -297,6 +323,8 @@ struct Precomputed { selections_per_line: usize, fast_lexicographic: bool, fast_ascii_insensitive: bool, + tokenize_blank_thousands_sep: bool, + tokenize_allow_unit_after_blank: bool, } impl GlobalSettings { @@ -341,6 +369,20 @@ impl GlobalSettings { .filter(|s| matches!(s.settings.mode, SortMode::GeneralNumeric)) .count(); + let uses_numeric = self + .selectors + .iter() + .any(|s| matches!(s.settings.mode, SortMode::Numeric | SortMode::HumanNumeric)); + let uses_human_numeric = self + .selectors + .iter() + .any(|s| matches!(s.settings.mode, SortMode::HumanNumeric)); + self.precomputed.tokenize_blank_thousands_sep = self.separator.is_none() + && uses_numeric + && self.numeric_locale.thousands_sep == Some(b' '); + self.precomputed.tokenize_allow_unit_after_blank = + self.precomputed.tokenize_blank_thousands_sep && uses_human_numeric; + self.precomputed.fast_lexicographic = !disable_fast_lexicographic && self.can_use_fast_lexicographic(); self.precomputed.fast_ascii_insensitive = self.can_use_fast_ascii_insensitive(); @@ -413,6 +455,7 @@ impl Default for GlobalSettings { buffer_size_is_explicit: false, compress_prog: None, merge_batch_size: default_merge_batch_size(), + numeric_locale: NumericLocaleSettings::default(), precomputed: Precomputed::default(), } } @@ -597,7 +640,12 @@ impl<'a> Line<'a> { } token_buffer.clear(); if settings.precomputed.needs_tokens { - tokenize(line, settings.separator, token_buffer); + tokenize( + line, + settings.separator, + token_buffer, + &settings.precomputed, + ); } if settings.mode == SortMode::Numeric { // exclude inf, nan, scientific notation @@ -607,11 +655,12 @@ impl<'a> Line<'a> { .and_then(|s| s.parse::().ok()); line_data.line_num_floats.push(line_num_float); } - for (selector, selection) in settings - .selectors - .iter() - .map(|selector| (selector, selector.get_selection(line, token_buffer))) - { + for (selector, selection) in settings.selectors.iter().map(|selector| { + ( + selector, + selector.get_selection(line, token_buffer, &settings.numeric_locale), + ) + }) { match selection { Selection::AsBigDecimal(parsed_float) => line_data.parsed_floats.push(parsed_float), Selection::WithNumInfo(str, num_info) => { @@ -660,7 +709,12 @@ impl<'a> Line<'a> { writeln!(writer)?; let mut fields = vec![]; - tokenize(self.line, settings.separator, &mut fields); + tokenize( + self.line, + settings.separator, + &mut fields, + &settings.precomputed, + ); for selector in &settings.selectors { let mut selection = selector.get_range(self.line, Some(&fields)); match selector.settings.mode { @@ -668,10 +722,9 @@ impl<'a> Line<'a> { // find out which range is used for numeric comparisons let (_, num_range) = NumInfo::parse( &self.line[selection.clone()], - &NumInfoParseSettings { - accept_si_units: selector.settings.mode == SortMode::HumanNumeric, - ..Default::default() - }, + &settings + .numeric_locale + .num_info_settings(selector.settings.mode == SortMode::HumanNumeric), ); let initial_selection = selection.clone(); @@ -789,24 +842,50 @@ impl<'a> Line<'a> { } /// Tokenize a line into fields. The result is stored into `token_buffer`. -fn tokenize(line: &[u8], separator: Option, token_buffer: &mut Vec) { +fn tokenize( + line: &[u8], + separator: Option, + token_buffer: &mut Vec, + precomputed: &Precomputed, +) { assert!(token_buffer.is_empty()); if let Some(separator) = separator { tokenize_with_separator(line, separator, token_buffer); } else { - tokenize_default(line, token_buffer); + tokenize_default( + line, + token_buffer, + precomputed.tokenize_blank_thousands_sep, + precomputed.tokenize_allow_unit_after_blank, + ); } } /// By default fields are separated by the first whitespace after non-whitespace. /// Whitespace is included in fields at the start. /// The result is stored into `token_buffer`. -fn tokenize_default(line: &[u8], token_buffer: &mut Vec) { +fn tokenize_default( + line: &[u8], + token_buffer: &mut Vec, + blank_thousands_sep: bool, + allow_unit_after_blank: bool, +) { token_buffer.push(0..0); // pretend that there was whitespace in front of the line let mut previous_was_whitespace = true; for (idx, char) in line.iter().enumerate() { - if char.is_ascii_whitespace() { + let is_whitespace = char.is_ascii_whitespace(); + let treat_as_separator = if is_whitespace { + if blank_thousands_sep && *char == b' ' { + !is_blank_thousands_sep(line, idx, allow_unit_after_blank) + } else { + true + } + } else { + false + }; + + if treat_as_separator { if !previous_was_whitespace { token_buffer.last_mut().unwrap().end = idx; token_buffer.push(idx..0); @@ -819,6 +898,31 @@ fn tokenize_default(line: &[u8], token_buffer: &mut Vec) { token_buffer.last_mut().unwrap().end = line.len(); } +fn is_blank_thousands_sep(line: &[u8], idx: usize, allow_unit_after_blank: bool) -> bool { + if line.get(idx) != Some(&b' ') { + return false; + } + + let prev_is_digit = idx + .checked_sub(1) + .and_then(|prev_idx| line.get(prev_idx)) + .is_some_and(u8::is_ascii_digit); + if !prev_is_digit { + return false; + } + + let next = line.get(idx + 1).copied(); + match next { + Some(c) if c.is_ascii_digit() => true, + Some(b'K' | b'k' | b'M' | b'G' | b'T' | b'P' | b'E' | b'Z' | b'Y' | b'R' | b'Q') + if allow_unit_after_blank => + { + true + } + _ => false, + } +} + /// Split between separators. These separators are not included in fields. /// The result is stored into `token_buffer`. fn tokenize_with_separator(line: &[u8], separator: u8, token_buffer: &mut Vec) { @@ -1077,7 +1181,12 @@ impl FieldSelector { /// Get the selection that corresponds to this selector for the line. /// If `needs_fields` returned false, tokens may be empty. - fn get_selection<'a>(&self, line: &'a [u8], tokens: &[Field]) -> Selection<'a> { + fn get_selection<'a>( + &self, + line: &'a [u8], + tokens: &[Field], + numeric_locale: &NumericLocaleSettings, + ) -> Selection<'a> { // `get_range` expects `None` when we don't need tokens and would get confused by an empty vector. let tokens = if self.needs_tokens { Some(tokens) @@ -1097,14 +1206,10 @@ impl FieldSelector { }; // Parse NumInfo for this number. - let (info, num_range) = NumInfo::parse( - range_str, - &NumInfoParseSettings { - accept_si_units: self.settings.mode == SortMode::HumanNumeric, - thousands_separator, - ..Default::default() - }, - ); + let mut parse_settings = + numeric_locale.num_info_settings(self.settings.mode == SortMode::HumanNumeric); + parse_settings.thousands_separator = thousands_separator; + let (info, num_range) = NumInfo::parse(range_str, &parse_settings); // Shorten the range to what we need to pass to numeric_str_cmp later. range_str = &range_str[num_range]; Selection::WithNumInfo(range_str, info) @@ -1216,6 +1321,16 @@ impl FieldSelector { } } +fn detect_numeric_locale() -> NumericLocaleSettings { + let mut settings = NumericLocaleSettings::default(); + settings.decimal_pt = Some(locale_decimal_pt()); + settings.thousands_sep = match i18n::decimal::locale_grouping_separator().as_bytes() { + [b] => Some(*b), + _ => None, + }; + settings +} + /// Creates an `Arg` for a sort mode flag. fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { Arg::new(mode) @@ -1847,7 +1962,10 @@ fn emit_debug_warnings( #[uucore::main] #[allow(clippy::cognitive_complexity)] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let mut settings = GlobalSettings::default(); + let mut settings = GlobalSettings { + numeric_locale: detect_numeric_locale(), + ..Default::default() + }; let (processed_args, mut legacy_warnings) = preprocess_legacy_args(args); if !legacy_warnings.is_empty() { @@ -2965,7 +3083,8 @@ mod tests { fn tokenize_helper(line: &[u8], separator: Option) -> Vec { let mut buffer = vec![]; - tokenize(line, separator, &mut buffer); + let precomputed = Precomputed::default(); + tokenize(line, separator, &mut buffer, &precomputed); buffer } From 86dc5830c45ec9a12f74166806c41719a90590fc Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 17 Jan 2026 21:35:20 +0900 Subject: [PATCH 02/11] test: add test for human-numeric sort with blank thousands separator in sv_SE locale Add a new test function `test_human_numeric_blank_thousands_sep_locale` to verify that the sort utility correctly handles human-readable numeric sorting when the locale's thousands separator is a blank space (e.g., in sv_SE.UTF-8 or sv_SE). This ensures proper behavior of the `-h` flag with key-based sorting in such locales, preventing potential sorting errors with space-separated numeric strings. --- tests/by-util/test_sort.rs | 65 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index bc2092b8db2..faaec3fd495 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -8,6 +8,8 @@ use std::env; use std::fmt::Write as FmtWrite; +#[cfg(unix)] +use std::process::Command; use std::time::Duration; use uutests::at_and_ucmd; @@ -1693,6 +1695,69 @@ fn test_g_float_locale_decimal_separator() { .stdout_is("1.10\n1.9\n"); } +#[test] +#[cfg(unix)] +fn test_human_numeric_blank_thousands_sep_locale() { + fn thousands_sep_for(locale: &str) -> Option { + let output = Command::new("locale") + .arg("thousands_sep") + .env("LC_ALL", locale) + .output() + .ok()?; + if !output.status.success() { + return None; + } + let sep = String::from_utf8_lossy(&output.stdout); + let sep = sep.trim_end_matches(|ch| ch == '\n' || ch == '\r'); + if sep.is_empty() || sep.as_bytes().len() != 1 || !sep.chars().all(|c| c.is_whitespace()) { + return None; + } + Some(sep.to_string()) + } + + let candidates = ["sv_SE.UTF-8", "sv_SE"]; + let mut selected_locale = None; + let mut thousands_sep = None; + for candidate in candidates { + if let Some(sep) = thousands_sep_for(candidate) { + selected_locale = Some(candidate.to_string()); + thousands_sep = Some(sep); + break; + } + } + + let (Some(locale), Some(sep)) = (selected_locale, thousands_sep) else { + return; + }; + + let line1 = format!("1 1k 1 M 4{sep}003 1M"); + let line2 = format!("2k 2M 2 k 4{sep}002 2"); + let line3 = format!("3M 3 3 G 4{sep}001 3k"); + let input = format!("{line1}\n{line2}\n{line3}\n"); + + let ts = TestScenario::new("sort"); + ts.fixtures.write("blank-thousands.txt", &input); + + let cases = [ + (1, format!("{line1}\n{line2}\n{line3}\n")), + (2, format!("{line3}\n{line1}\n{line2}\n")), + (3, format!("{line1}\n{line2}\n{line3}\n")), + (5, format!("{line3}\n{line2}\n{line1}\n")), + ]; + + for (key, expected) in cases { + let key_str = key.to_string(); + ts.ucmd() + .env("LC_ALL", &locale) + .arg("-h") + .arg("-k") + .arg(&key_str) + .arg("blank-thousands.txt") + .succeeds() + .stdout_is(expected); + } +} + #[test] // Test misc numbers ("'a" is not interpreted as literal, trailing text is ignored...) fn test_g_misc() { From 80f49fb353b46d441c8818e8518123e926ba0df4 Mon Sep 17 00:00:00 2001 From: mattsu Date: Sat, 17 Jan 2026 21:40:09 +0900 Subject: [PATCH 03/11] refactor: simplify separator trimming in locale test Use array slice for trim_end_matches and String::len for length check to improve readability and efficiency in test_human_numeric_blank_thousands_sep_locale. --- tests/by-util/test_sort.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index faaec3fd495..f48308de0db 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -1708,8 +1708,8 @@ fn test_human_numeric_blank_thousands_sep_locale() { return None; } let sep = String::from_utf8_lossy(&output.stdout); - let sep = sep.trim_end_matches(|ch| ch == '\n' || ch == '\r'); - if sep.is_empty() || sep.as_bytes().len() != 1 || !sep.chars().all(|c| c.is_whitespace()) { + let sep = sep.trim_end_matches(&['\n', '\r'][..]); + if sep.is_empty() || sep.len() != 1 || !sep.chars().all(|c| c.is_whitespace()) { return None; } Some(sep.to_string()) From 39ed771fd1159e90d26ef865098cf35aa263b804 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 25 Dec 2025 22:22:01 +0900 Subject: [PATCH 04/11] feat(sort): add locale-aware numeric sorting support Implement NumericLocaleSettings to handle thousands separators and decimal points based on locale. Update tokenization logic to accommodate blank thousands separators for numeric and human-numeric modes, improving parsing of locale-specific numbers. Also refactor numeric locale detection for safety/readability and clean up related initialization/spell-checker ignore. --- src/uu/sort/src/sort.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 5f7bb8ef24f..51cdb862b53 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1330,7 +1330,6 @@ fn detect_numeric_locale() -> NumericLocaleSettings { }; settings } - /// Creates an `Arg` for a sort mode flag. fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { Arg::new(mode) From dd720344ce8f78946b84fb9dc55874309658d79f Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 08:44:50 +0900 Subject: [PATCH 05/11] refactor(sort): simplify detect_numeric_locale with struct literal Use struct literal initialization instead of creating a mutable default and assigning fields, improving code conciseness and readability without changing functionality. --- src/uu/sort/src/sort.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 51cdb862b53..b3061f9b23c 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1322,13 +1322,13 @@ impl FieldSelector { } fn detect_numeric_locale() -> NumericLocaleSettings { - let mut settings = NumericLocaleSettings::default(); - settings.decimal_pt = Some(locale_decimal_pt()); - settings.thousands_sep = match i18n::decimal::locale_grouping_separator().as_bytes() { - [b] => Some(*b), - _ => None, - }; - settings + NumericLocaleSettings { + decimal_pt: Some(locale_decimal_pt()), + thousands_sep: match i18n::decimal::locale_grouping_separator().as_bytes() { + [b] => Some(*b), + _ => None, + }, + } } /// Creates an `Arg` for a sort mode flag. fn make_sort_mode_arg(mode: &'static str, short: char, help: String) -> Arg { From d7be9657cc77dae3848bb98ac8972ff9b21e5293 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 09:11:11 +0900 Subject: [PATCH 06/11] refactor(sort): improve thousands separator handling in numeric sorting - Ignore thousands separators in debug annotations to match GNU output - Simplify NumInfo parsing by removing redundant thousands separator logic - Enhance detection of numeric locale settings to handle multibyte separators like NBSP correctly, maintaining single-byte behavior for compatibility with upstream GNU coreutils --- src/uu/sort/src/sort.rs | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index b3061f9b23c..cf9073f52fe 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -720,12 +720,12 @@ impl<'a> Line<'a> { match selector.settings.mode { SortMode::Numeric | SortMode::HumanNumeric => { // find out which range is used for numeric comparisons - let (_, num_range) = NumInfo::parse( - &self.line[selection.clone()], - &settings - .numeric_locale - .num_info_settings(selector.settings.mode == SortMode::HumanNumeric), - ); + let mut parse_settings = settings + .numeric_locale + .num_info_settings(selector.settings.mode == SortMode::HumanNumeric); + // Debug annotations should ignore thousands separators to match GNU output. + parse_settings.thousands_separator = None; + let (_, num_range) = NumInfo::parse(&self.line[selection.clone()], &parse_settings); let initial_selection = selection.clone(); // Shorten selection to num_range. @@ -1195,21 +1195,11 @@ impl FieldSelector { }; let mut range_str = &line[self.get_range(line, tokens)]; if self.settings.mode == SortMode::Numeric || self.settings.mode == SortMode::HumanNumeric { - // Get the thousands separator from the locale, handling cases where the separator is empty or multi-character - let locale_thousands_separator = i18n::decimal::locale_grouping_separator().as_bytes(); - - // Upstream GNU coreutils ignore multibyte thousands separators - // (FIXME in C source). We keep the same single-byte behavior. - let thousands_separator = match locale_thousands_separator { - [b] => Some(*b), - _ => None, - }; - // Parse NumInfo for this number. - let mut parse_settings = - numeric_locale.num_info_settings(self.settings.mode == SortMode::HumanNumeric); - parse_settings.thousands_separator = thousands_separator; - let (info, num_range) = NumInfo::parse(range_str, &parse_settings); + let (info, num_range) = NumInfo::parse( + range_str, + &numeric_locale.num_info_settings(self.settings.mode == SortMode::HumanNumeric), + ); // Shorten the range to what we need to pass to numeric_str_cmp later. range_str = &range_str[num_range]; Selection::WithNumInfo(range_str, info) @@ -1322,10 +1312,17 @@ impl FieldSelector { } fn detect_numeric_locale() -> NumericLocaleSettings { + let encoding = i18n::get_numeric_locale().1; + let grouping = i18n::decimal::locale_grouping_separator(); NumericLocaleSettings { decimal_pt: Some(locale_decimal_pt()), - thousands_sep: match i18n::decimal::locale_grouping_separator().as_bytes() { + // Upstream GNU coreutils ignore multibyte thousands separators + // (FIXME in C source). We keep the same single-byte behavior. + thousands_sep: match grouping.as_bytes() { [b] => Some(*b), + // ICU returns NBSP as UTF-8 (0xC2 0xA0). In non-UTF8 locales like ISO-8859-1, + // the input byte is 0xA0, so map it to a single-byte separator. + [0xC2, 0xA0] if encoding != i18n::UEncoding::Utf8 => Some(0xA0), _ => None, }, } From f79a86805d5a5b2a8a5f1d38b4623daa58e4fdd7 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 12:44:52 +0900 Subject: [PATCH 07/11] fix(sort): handle C locale numeric settings correctly - Update detect_numeric_locale to check for C locale (ASCII encoding and "und" locale) - In C locale, set thousands_sep to None to avoid incorrect grouping separators - Adjust test expectations to match new sorting behavior for numeric fields in C locale --- src/uu/sort/src/sort.rs | 13 ++++++++++++- tests/by-util/test_sort.rs | 8 ++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index cf9073f52fe..74b54f13afb 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -1312,7 +1312,18 @@ impl FieldSelector { } fn detect_numeric_locale() -> NumericLocaleSettings { - let encoding = i18n::get_numeric_locale().1; + let numeric_locale = i18n::get_numeric_locale(); + let locale = &numeric_locale.0; + let encoding = numeric_locale.1; + let is_c_locale = encoding == i18n::UEncoding::Ascii && locale.to_string() == "und"; + + if is_c_locale { + return NumericLocaleSettings { + decimal_pt: Some(DECIMAL_PT), + thousands_sep: None, + }; + } + let grouping = i18n::decimal::locale_grouping_separator(); NumericLocaleSettings { decimal_pt: Some(locale_decimal_pt()), diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index f48308de0db..42cbcff5ad4 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -2460,18 +2460,18 @@ _ __ 1 _ -2.4 -___ 2,5 _ -2.,,3 -__ 2.4 ___ +2.,,3 +__ 2.4 ___ 2,,3 _ +2.4 +___ 1a _ 2b From 5e0d83fd156defb4263130e92908179826e41f9b Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 12:46:27 +0900 Subject: [PATCH 08/11] refactor(sort): split long line assignment for improved readability The assignment of NumInfo::parse result was reformatted by splitting it across two lines to enhance code readability and adhere to line length guidelines. --- src/uu/sort/src/sort.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 74b54f13afb..135177b48ba 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -725,7 +725,8 @@ impl<'a> Line<'a> { .num_info_settings(selector.settings.mode == SortMode::HumanNumeric); // Debug annotations should ignore thousands separators to match GNU output. parse_settings.thousands_separator = None; - let (_, num_range) = NumInfo::parse(&self.line[selection.clone()], &parse_settings); + let (_, num_range) = + NumInfo::parse(&self.line[selection.clone()], &parse_settings); let initial_selection = selection.clone(); // Shorten selection to num_range. From b1b2a198e54da4cdee65f9db5f0dff1d0c8075cd Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 13:45:26 +0900 Subject: [PATCH 09/11] i18n: treat C locale as no grouping separator --- src/uucore/src/lib/features/i18n/decimal.rs | 12 ++++++++-- .../mixed_floats_ints_chars_numeric.expected | 4 ++-- ...d_floats_ints_chars_numeric.expected.debug | 12 +++++----- ..._floats_ints_chars_numeric_stable.expected | 4 ++-- ...s_ints_chars_numeric_stable.expected.debug | 8 +++---- ..._floats_ints_chars_numeric_unique.expected | 3 +-- ...s_ints_chars_numeric_unique.expected.debug | 6 ++--- ...ints_chars_numeric_unique_reverse.expected | 3 +-- ...hars_numeric_unique_reverse.expected.debug | 6 ++--- .../sort/multiple_groupings_numeric.expected | 8 +++---- .../multiple_groupings_numeric.expected.debug | 24 +++++++++---------- 11 files changed, 46 insertions(+), 44 deletions(-) diff --git a/src/uucore/src/lib/features/i18n/decimal.rs b/src/uucore/src/lib/features/i18n/decimal.rs index 0a901143c6b..a7ceca2efa3 100644 --- a/src/uucore/src/lib/features/i18n/decimal.rs +++ b/src/uucore/src/lib/features/i18n/decimal.rs @@ -6,7 +6,7 @@ use std::sync::OnceLock; use icu_decimal::provider::DecimalSymbolsV1; -use icu_locale::Locale; +use icu_locale::{Locale, locale}; use icu_provider::prelude::*; use crate::i18n::get_numeric_locale; @@ -60,7 +60,15 @@ fn get_grouping_separator(loc: Locale) -> String { pub fn locale_grouping_separator() -> &'static str { static GROUPING_SEP: OnceLock = OnceLock::new(); - GROUPING_SEP.get_or_init(|| get_grouping_separator(get_numeric_locale().0.clone())) + GROUPING_SEP.get_or_init(|| { + let loc = get_numeric_locale().0.clone(); + // C/POSIX locale (represented as "und") has no grouping separator. + if loc == locale!("und") { + String::new() + } else { + get_grouping_separator(loc) + } + }) } #[cfg(test)] diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected index a781a36bba8..59541af3252 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected @@ -21,10 +21,10 @@ CARAvan 8.013 45 46.89 - 4567. - 37800 576,446.88800000 576,446.890 + 4567. + 37800 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug index a00067b1ee6..b7b76e58986 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric.expected.debug @@ -67,18 +67,18 @@ __ 46.89 _____ _____ - 4567. - _____ -____________________ ->>>>37800 - _____ -_________ 576,446.88800000 ___ ________________ 576,446.890 ___ ___________ + 4567. + _____ +____________________ +>>>>37800 + _____ +_________ 4798908.340000000000 ____________________ ____________________ diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected index 36eeda637f7..0ccdd84c059 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected @@ -24,10 +24,10 @@ CARAvan 8.013 45 46.89 +576,446.890 +576,446.88800000 4567. 37800 -576,446.88800000 -576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug index 3fba8903042..66a98b20879 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_stable.expected.debug @@ -50,14 +50,14 @@ _____ __ 46.89 _____ +576,446.890 +___ +576,446.88800000 +___ 4567. _____ >>>>37800 _____ -576,446.88800000 -___ -576,446.890 -___ 4798908.340000000000 ____________________ 4798908.45 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected index cb27c6664ce..cd4256c5f46 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected @@ -11,10 +11,9 @@ 8.013 45 46.89 +576,446.890 4567. 37800 -576,446.88800000 -576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug index dd6e8dfcc67..663a4b3a918 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique.expected.debug @@ -24,14 +24,12 @@ _____ __ 46.89 _____ +576,446.890 +___ 4567. _____ >>>>37800 _____ -576,446.88800000 -___ -576,446.890 -___ 4798908.340000000000 ____________________ 4798908.45 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected index bbce169347f..97e261f1452 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected @@ -1,10 +1,9 @@ 4798908.8909800 4798908.45 4798908.340000000000 -576,446.890 -576,446.88800000 37800 4567. +576,446.890 46.89 45 8.013 diff --git a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug index 4b01a840618..01f7abf5bf2 100644 --- a/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug +++ b/tests/fixtures/sort/mixed_floats_ints_chars_numeric_unique_reverse.expected.debug @@ -4,14 +4,12 @@ _______________ __________ 4798908.340000000000 ____________________ -576,446.890 -___ -576,446.88800000 -___ >>>>37800 _____ 4567. _____ +576,446.890 +___ 46.89 _____ 45 diff --git a/tests/fixtures/sort/multiple_groupings_numeric.expected b/tests/fixtures/sort/multiple_groupings_numeric.expected index a6daab83676..9dd5b5f6553 100644 --- a/tests/fixtures/sort/multiple_groupings_numeric.expected +++ b/tests/fixtures/sort/multiple_groupings_numeric.expected @@ -2,14 +2,14 @@ CARAvan + 1,999.99 +1,234 1.234 + 2,000 2.000 2.000,50 +12,34 22 23,. 111 210 -1,234 -12,34 - 1,999.99 - 2,000 diff --git a/tests/fixtures/sort/multiple_groupings_numeric.expected.debug b/tests/fixtures/sort/multiple_groupings_numeric.expected.debug index 57a4ae01b9a..62e98a46a18 100644 --- a/tests/fixtures/sort/multiple_groupings_numeric.expected.debug +++ b/tests/fixtures/sort/multiple_groupings_numeric.expected.debug @@ -10,15 +10,27 @@ CARAvan ^ no match for key _______ +>>1,999.99 + _ +__________ +1,234 +_ +_____ >1.234 _____ ______ +>>>2,000 + _ +________ 2.000 _____ _____ 2.000,50 _____ ________ +12,34 +__ +_____ 22 __ __ @@ -31,15 +43,3 @@ ___ >210 ___ ____ -1,234 -_ -_____ -12,34 -__ -_____ ->>1,999.99 - _ -__________ ->>>2,000 - _ -________ From ce96a263c893228e96b12789a9aa98af6d5d6f08 Mon Sep 17 00:00:00 2001 From: mattsu Date: Thu, 22 Jan 2026 14:54:13 +0900 Subject: [PATCH 10/11] fix(tests): correct sorting order of decimal values in numeric sort fixture Update the expected output for the multiple decimals numeric sort test to reflect the proper ascending order. The values "576,446.88800000" and "576,446.890" were misplaced and have been repositioned to their correct locations in the sorted sequence, ensuring the test accurately validates the sorting logic. The debug fixture was updated accordingly. --- .../fixtures/sort/multiple_decimals_numeric.expected | 4 ++-- .../sort/multiple_decimals_numeric.expected.debug | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/fixtures/sort/multiple_decimals_numeric.expected b/tests/fixtures/sort/multiple_decimals_numeric.expected index 3ef4d22e881..8f42e7ce5da 100644 --- a/tests/fixtures/sort/multiple_decimals_numeric.expected +++ b/tests/fixtures/sort/multiple_decimals_numeric.expected @@ -21,6 +21,8 @@ CARAvan 8.013 45 46.89 +576,446.88800000 +576,446.890 4567..457 4567. 4567.1 @@ -28,8 +30,6 @@ CARAvan 37800 45670.89079.098 45670.89079.1 -576,446.88800000 -576,446.890 4798908.340000000000 4798908.45 4798908.8909800 diff --git a/tests/fixtures/sort/multiple_decimals_numeric.expected.debug b/tests/fixtures/sort/multiple_decimals_numeric.expected.debug index 0ae6d2958a5..948c4869c32 100644 --- a/tests/fixtures/sort/multiple_decimals_numeric.expected.debug +++ b/tests/fixtures/sort/multiple_decimals_numeric.expected.debug @@ -67,6 +67,12 @@ __ 46.89 _____ _____ +576,446.88800000 +___ +________________ +576,446.890 +___ +___________ >>>>>>>>>>4567..457 _____ ___________________ @@ -88,12 +94,6 @@ _____________________ >>>>>>45670.89079.1 ___________ ___________________ -576,446.88800000 -___ -________________ -576,446.890 -___ -___________ 4798908.340000000000 ____________________ ____________________ From 6cde13e75a655d3f9d5d032273fc8a6a08bc7bcf Mon Sep 17 00:00:00 2001 From: mattsu Date: Fri, 23 Jan 2026 08:33:04 +0900 Subject: [PATCH 11/11] fix(sort): skip ordering incompatibility check when --key is specified Previously, the ordering_incompatible check was performed unconditionally, causing errors even when the --key option was used, where such incompatibilities might not apply. This change adds a condition to skip the check if --key is present, ensuring correct behavior for key-based sorting. --- src/uu/sort/src/sort.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 135177b48ba..cbde70a3f1f 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -2081,7 +2081,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let ignore_non_printing = matches.get_flag(options::IGNORE_NONPRINTING); let ignore_case = matches.get_flag(options::IGNORE_CASE); - if ordering_incompatible(mode_flags, dictionary_order, ignore_non_printing) { + if !matches.contains_id(options::KEY) + && ordering_incompatible(mode_flags, dictionary_order, ignore_non_printing) + { let opts = ordering_opts_string( mode_flags, dictionary_order,