From ee7861846f1cbece30a33f8ab511719d32fde578 Mon Sep 17 00:00:00 2001 From: PgBiel <9021226+PgBiel@users.noreply.github.com> Date: Tue, 3 Jun 2025 18:39:56 -0300 Subject: [PATCH 1/9] Fix decimal scientific notation (#24) --- oxifmt.typ | 10 +--------- tests/strfmt-tests.typ | 3 +++ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/oxifmt.typ b/oxifmt.typ index c12882e..cd774a8 100644 --- a/oxifmt.typ +++ b/oxifmt.typ @@ -276,15 +276,7 @@ // Normalize decimals with larger scales than is needed let fractional = fractional.sum(default: "").trim("0", at: end) let (integral, fractional, exponent) = if num > -1 and num < 1 and fractional != "" { - let first-non-zero = fractional.position("1") - if first-non-zero == none { first-non-zero = fractional.position("2") } - if first-non-zero == none { first-non-zero = fractional.position("3") } - if first-non-zero == none { first-non-zero = fractional.position("4") } - if first-non-zero == none { first-non-zero = fractional.position("5") } - if first-non-zero == none { first-non-zero = fractional.position("6") } - if first-non-zero == none { first-non-zero = fractional.position("7") } - if first-non-zero == none { first-non-zero = fractional.position("8") } - if first-non-zero == none { first-non-zero = fractional.position("9") } + let first-non-zero = fractional.codepoints().position(s => s != "0") assert(first-non-zero != none, message: "String formatter internal error: expected non-zero fractional digit") // Integral part is zero diff --git a/tests/strfmt-tests.typ b/tests/strfmt-tests.typ index 25c5114..a2065fb 100644 --- a/tests/strfmt-tests.typ +++ b/tests/strfmt-tests.typ @@ -132,6 +132,9 @@ assert.eq(strfmt("{:e}", decimal("132423")), "1.32423e5") assert.eq(strfmt("{:e}", decimal("-132423")), "-1.32423e5") assert.eq(strfmt("{:011.5}", decimal("1234.5")), "01234.50000") + + // Issue #23: decimal exponential + assert.eq(strfmt("{:e}", decimal("0.31")), "3.1e-1") } // Issue #16: large numbers #{ From 6b873871ae5e3844d24e101bbbbbcd6314c06831 Mon Sep 17 00:00:00 2001 From: PgBiel <9021226+PgBiel@users.noreply.github.com> Date: Tue, 3 Jun 2025 19:14:13 -0300 Subject: [PATCH 2/9] Close format specifiers eagerly on parsing (#22) * close format specifiers eagerly Plus error on stray } at the end * remove unnecessary branch * change rbracket check --- oxifmt.typ | 65 ++++++++++++++++++------------------------ tests/strfmt-tests.typ | 10 ++++++- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/oxifmt.typ b/oxifmt.typ index cd774a8..66d86f5 100644 --- a/oxifmt.typ +++ b/oxifmt.typ @@ -89,62 +89,50 @@ last-was-lbracket = false // escape {{ last-was-rbracket = false if current-fmt-span.at(0) == last-i { + // outside a span ({...} {{ <-) => emit an 'escaped' token current-fmt-span = none // cancel this span current-fmt-name = none - } - if current-fmt-name != none { - // if in the middle of a larger span ({ ... {{ <-): - // add the escaped character to the format name - current-fmt-name += character - } else { - // outside a span ({...} {{ <-) => emit an 'escaped' token result.push((escape: (escaped: "{", span: (last-i, i + 1)))) + } else { + panic("String formatter error: internal error: invalid left bracket state") } - - last-i = i - i += 1 // '{' is ASCII, so 1 byte - continue - } - if last-was-rbracket { - // { ... }{ <--- ok, close the previous span - (result, current-fmt-span, current-fmt-name) = write-format-span(last-i, result, current-fmt-span, current-fmt-name) - last-was-rbracket = false - } - if current-fmt-span == none { + } else if current-fmt-span == none { // begin span current-fmt-span = (i, none) current-fmt-name = "" + + // indicate we just started a span + // in case it is escaped right afterwards + last-was-lbracket = true + } else { + // if in the middle of a larger span ({ ... { <-): + // error + excessive-lbracket() } - last-was-lbracket = true } else if character == "}" { last-was-lbracket = false - if last-was-rbracket { - last-was-rbracket = false // escape }} - if current-fmt-name != none { - current-fmt-name += character - } else { + if current-fmt-span == none { + if last-was-rbracket { + last-was-rbracket = false // escape }} result.push((escape: (escaped: "}", span: (last-i, i + 1)))) + } else { + // delay erroring on unmatched } to the next iteration + // in case this is an escaped } + last-was-rbracket = true } - - last-i = i - i += 1 // '}' is ASCII, so 1 byte - continue + } else { + // { ... } <--- ok, close the previous span + // Do this eagerly, escaping } inside { ... } is invalid + (result, current-fmt-span, current-fmt-name) = write-format-span(i, result, current-fmt-span, current-fmt-name) } - // delay closing the span to the next iteration - // in case this is an escaped } - last-was-rbracket = true } else { - // { ... {A <--- non-escaped { inside larger {} - if last-was-lbracket and (current-fmt-span != none and current-fmt-span.at(0) != last-i) { - excessive-lbracket() - } if last-was-rbracket { if current-fmt-span == none { // {...} }A <--- non-escaped } with no matching { excessive-rbracket() } else { - // { ... }A <--- ok, close the previous span - (result, current-fmt-span, current-fmt-name) = write-format-span(last-i, result, current-fmt-span, current-fmt-name) + // { ... }A <--- span should have been eagerly closed already + panic("String formatter error: internal error: invalid right bracket state") } } // {abc <--- add character to the format name @@ -167,6 +155,9 @@ // {abcd| <--- string ended with unclosed span missing-rbracket() } + } else if last-was-rbracket { + // } <--- unmatched and unescaped } at the very end + excessive-rbracket() } result diff --git a/tests/strfmt-tests.typ b/tests/strfmt-tests.typ index a2065fb..fefcc33 100644 --- a/tests/strfmt-tests.typ +++ b/tests/strfmt-tests.typ @@ -11,7 +11,9 @@ assert.eq(strfmt("a{{}}b ={}{}= c{0}d", false, (a: "55", b: 20.3)), "a{}b =false(a: \"55\", b: 20.3)= cfalsed") // test escaping {{ }} from inside { } formats - assert.eq(strfmt("a{b{{b}}b}", ..("b{b}b": 5)), "a5") + // (this is now invalid and should error) + // assert.eq(strfmt("a{b{{b}}b}", ..("b{b}b": 5)), "a5") + // assert.eq(strfmt("a{b}}b}", ..("b}b": 5)), "a5") // test 0 prefix with numbers, but also using 0 as a non-numeric affix assert.eq(strfmt("{:08}|{0:0<8}|{0:0>8}|{0:0^8}", 120), "00000120|12000000|00000120|000120000") @@ -153,6 +155,12 @@ assert.eq(strfmt("{0:e}",1.7976931348623157e+308), "1.7976931348623146e308") assert.eq(strfmt("{0:e}",-1.7976931348623157e+308), "-1.7976931348623146e308") } +// Issue #17: should not escape bracket in format var name +#{ + assert.eq(strfmt("{{{}}}", 1), "{1}") + assert.eq(strfmt("{{"), "{") + assert.eq(strfmt("}}"), "}") +} // DOC TESTS #{ // --- Quick examples --- From a0fa7c9f2c36a046131f50998b3793806cdab99c Mon Sep 17 00:00:00 2001 From: PgBiel <9021226+PgBiel@users.noreply.github.com> Date: Tue, 3 Jun 2025 20:04:51 -0300 Subject: [PATCH 3/9] Variably-sized thousands grouping (#25) --- oxifmt.typ | 68 ++++++++++++++++++++++++++++++++++++------ tests/strfmt-tests.typ | 24 +++++++++++++-- 2 files changed, 80 insertions(+), 12 deletions(-) diff --git a/oxifmt.typ b/oxifmt.typ index 66d86f5..1914544 100644 --- a/oxifmt.typ +++ b/oxifmt.typ @@ -5,6 +5,7 @@ #let _float-type = type(5.5) #let _str-type = type("") #let _label-type = type() +#let _arr-type = type(()) #let _minus-sign = "\u{2212}" #let using-080 = type(type(5)) != _str-type @@ -32,6 +33,39 @@ } } +// Splits an array into dynamic chunk sizes. +// 'chunks' is an array e.g. (1, 2, 3) indicating +// the sizes of each chunk. The last size is repeated if there +// are more elements than the chunks combined can cover. +// +// For example, if arr = ("a", "b", "c", "d", "e", "f", "g", "h", "i") and +// chunks = (2, 3), this will return +// (("a", "b"), ("c", "d", "e"), ("f", "g", "h"), ("i",)) +#let _arr-dyn-chunks(arr, chunks) = { + let i = 0 + let res = () + let chunk-i = 0 + if chunks == () { + return () + } + + for element in arr { + if i == 0 { + res.push(()) + i = chunks.at(chunk-i) + if i <= 0 { + assert(false, message: "String formatter error: internal error: received chunk of invalid size") + } + if chunk-i + 1 != chunks.len() { + chunk-i += 1 + } + } + res.last().push(element) + i -= 1 + } + res +} + #let _float-is-nan = if using-0110 { float.is-nan } else { @@ -345,7 +379,12 @@ parameter := argument '$' } let (integral, ..fractional) = string-replacement.split(".") if fmt-thousands-separator != "" and not is-nan and not is-inf { - integral = _arr-chunks(integral.codepoints().rev(), fmt-thousands-count) + let digit-groups = if type(fmt-thousands-count) == _arr-type { + _arr-dyn-chunks(integral.codepoints().rev(), fmt-thousands-count) + } else { + _arr-chunks(integral.codepoints().rev(), fmt-thousands-count) + } + integral = digit-groups .join(fmt-thousands-separator.codepoints().rev()) .rev() .join() @@ -537,7 +576,12 @@ parameter := argument '$' // Format with thousands AFTER zeroes, but BEFORE applying textual prefixes if fmt-thousands-separator != "" and not is-nan and not is-inf { - integral = _arr-chunks(integral.codepoints().rev(), fmt-thousands-count) + let digit-groups = if type(fmt-thousands-count) == _arr-type { + _arr-dyn-chunks(integral.codepoints().rev(), fmt-thousands-count) + } else { + _arr-chunks(integral.codepoints().rev(), fmt-thousands-count) + } + integral = digit-groups .join(fmt-thousands-separator.codepoints().rev()) .rev() .join() @@ -606,17 +650,23 @@ parameter := argument '$' ) } - if type(fmt-thousands-count) != _int-type { + if type(fmt-thousands-count) == _arr-type { assert( - false, - message: "String formatter error: 'fmt-thousands-count' must be an integer, got '" + str(type(fmt-thousands-count)) + "' instead." + fmt-thousands-count.all(c => type(c) == _int-type and c > 0), + message: "String formatter error: 'fmt-thousands-count' must be a positive integer or array of positive integers, got an array with at least one element that isn't a positive integer." ) - } - - if fmt-thousands-count <= 0 { + assert(fmt-thousands-count != (), message: "String formatter error: 'fmt-thousands-count' must not be an empty array, but an array of positive integers.") + } else if type(fmt-thousands-count) == _int-type { + if fmt-thousands-count <= 0 { + assert( + false, + message: "String formatter error: 'fmt-thousands-count' must be a positive integer, got " + str(fmt-thousands-count) + " instead." + ) + } + } else { assert( false, - message: "String formatter error: 'fmt-thousands-count' must be a positive integer, got " + str(fmt-thousands-count) + " instead." + message: "String formatter error: 'fmt-thousands-count' must be a positive integer or array of positive integers, got '" + str(type(fmt-thousands-count)) + "' instead." ) } diff --git a/tests/strfmt-tests.typ b/tests/strfmt-tests.typ index fefcc33..a7a17e8 100644 --- a/tests/strfmt-tests.typ +++ b/tests/strfmt-tests.typ @@ -105,9 +105,27 @@ assert.eq(strfmt("{:#b}", 255, fmt-thousands-count: 1, fmt-thousands-separator: "_"), "0b1_1_1_1_1_1_1_1") assert.eq(strfmt("{:#x}", -16 * 16 * 16 * 16 * 15, fmt-thousands-count: 2, fmt-thousands-separator: "_"), "-0xf_00_00") assert.eq(strfmt("{:o}", -16 * 16 * 16 * 16 * 15, fmt-thousands-count: 4, fmt-thousands-separator: "_"), "-360_0000") - assert.eq(strfmt("{:05}", float("nan"), fmt-thousands-count: 2, fmt-thousands-separator: "_"), "00NaN") - assert.eq(strfmt("{:05}", float("inf"), fmt-thousands-count: 2, fmt-thousands-separator: "_"), "00inf") - assert.eq(strfmt("{:05}", -float("inf"), fmt-thousands-count: 2, fmt-thousands-separator: "_"), "-0inf") + assert.eq(strfmt("{:08}", float("nan"), fmt-thousands-count: 2, fmt-thousands-separator: "_"), "00000NaN") + assert.eq(strfmt("{:08}", float("inf"), fmt-thousands-count: 2, fmt-thousands-separator: "_"), "00000inf") + assert.eq(strfmt("{:08}", -float("inf"), fmt-thousands-count: 2, fmt-thousands-separator: "_"), "-0000inf") + + // Issue #21: variable group sizes + assert.eq(strfmt("{}", 10, fmt-thousands-count: (3, 2), fmt-thousands-separator: "_"), "10") + assert.eq(strfmt("{}", 1000000, fmt-thousands-count: (3, 2), fmt-thousands-separator: ","), "10,00,000") + assert.eq(strfmt("{}", 1000000, fmt-thousands-count: (3, 2, 2, 2, 2, 2, 2, 2), fmt-thousands-separator: ","), "10,00,000") + assert.eq(strfmt("{}", 10000000000, fmt-thousands-count: (3, 2), fmt-thousands-separator: ","), "10,00,00,00,000") + assert.eq(strfmt("{}", 10000000000, fmt-thousands-count: (1, 2, 3), fmt-thousands-separator: ","), "10,000,000,00,0") + assert.eq(strfmt("{}", 10000000.3231, fmt-thousands-count: (1, 2, 3), fmt-thousands-separator: "_"), "10_000_00_0.3231") + assert.eq(strfmt("{}", float("nan"), fmt-thousands-count: (1, 2, 3), fmt-thousands-separator: "_"), "NaN") + assert.eq(strfmt("{}", float("inf"), fmt-thousands-count: (1, 2, 3), fmt-thousands-separator: "_"), "inf") + assert.eq(strfmt("{}", -float("inf"), fmt-thousands-count: (1, 2, 3), fmt-thousands-separator: "_"), "-inf") + assert.eq(strfmt("{:014}", -23003, fmt-thousands-count: (4, 4, 2), fmt-thousands-separator: "|"), "-0|00|00|0002|3003") + assert.eq(strfmt("{:#b}", 255, fmt-thousands-count: (1, 2, 1, 2, 1), fmt-thousands-separator: "_"), "0b1_1_11_1_11_1") + assert.eq(strfmt("{:#x}", -16 * 16 * 16 * 16 * 15, fmt-thousands-count: (3, 2), fmt-thousands-separator: "_"), "-0xf0_000") + assert.eq(strfmt("{:o}", -16 * 16 * 16 * 16 * 15, fmt-thousands-count: (4, 2, 1), fmt-thousands-separator: "_"), "-3_60_0000") + assert.eq(strfmt("{:08}", float("nan"), fmt-thousands-count: (2, 3), fmt-thousands-separator: "_"), "00000NaN") + assert.eq(strfmt("{:08}", float("inf"), fmt-thousands-count: (2, 3), fmt-thousands-separator: "_"), "00000inf") + assert.eq(strfmt("{:08}", -float("inf"), fmt-thousands-count: (2, 3), fmt-thousands-separator: "_"), "-0000inf") } // Issue #11: Decimals #if using-0120 { From 11b87a1185f48ab0f6aeeeef33f7e1c8de3e510b Mon Sep 17 00:00:00 2001 From: PgBiel <9021226+PgBiel@users.noreply.github.com> Date: Tue, 3 Jun 2025 20:08:54 -0300 Subject: [PATCH 4/9] New library entrypoint to hide private names (#26) --- lib.typ | 1 + tests/strfmt-tests.typ | 3 ++- typst.toml | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 lib.typ diff --git a/lib.typ b/lib.typ new file mode 100644 index 0000000..d2f7329 --- /dev/null +++ b/lib.typ @@ -0,0 +1 @@ +#import "oxifmt.typ": strfmt diff --git a/tests/strfmt-tests.typ b/tests/strfmt-tests.typ index a7a17e8..e7a37ff 100644 --- a/tests/strfmt-tests.typ +++ b/tests/strfmt-tests.typ @@ -1,4 +1,5 @@ -#import "../oxifmt.typ": strfmt, using-0120 +#import "../lib.typ": strfmt +#import "../oxifmt.typ": using-0120 #{ // test basics (sequential args, named args, pos args) diff --git a/typst.toml b/typst.toml index 0174763..e8fe9f0 100644 --- a/typst.toml +++ b/typst.toml @@ -4,5 +4,5 @@ version = "0.3.0" authors = ["PgBiel "] license = "MIT OR Apache-2.0" description = "Convenient Rust-like string formatting in Typst" -entrypoint = "oxifmt.typ" +entrypoint = "lib.typ" repository = "https://github.com/PgBiel/typst-oxifmt" From 48dc0d0a62cd6040cf34cb6b37daf5ba6afb5394 Mon Sep 17 00:00:00 2001 From: PgBiel <9021226+PgBiel@users.noreply.github.com> Date: Wed, 4 Jun 2025 01:29:26 -0300 Subject: [PATCH 5/9] Allow braces for padding (#30) * allow braces for padding * use lookahead this case is so rare, it's not worth it to overcomplicate things * undo the weird state changes * aesthetic changes --- oxifmt.typ | 28 ++++++++++++++++++++++++++-- tests/strfmt-tests.typ | 11 +++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/oxifmt.typ b/oxifmt.typ index 1914544..29c3181 100644 --- a/oxifmt.typ +++ b/oxifmt.typ @@ -88,6 +88,8 @@ // -- parsing state -- let current-fmt-span = none let current-fmt-name = none + // if we're at {abc:|, i.e. right after a colon in {} + let last-was-fmt-colon = false // if the last character was an unescaped { let last-was-lbracket = false // if the last character was an unescaped } @@ -116,6 +118,7 @@ // -- parse loop -- let last-i = none let i = 0 + let code-i = 0 for character in codepoints { if character == "{" { // double l-bracket = escape @@ -138,11 +141,19 @@ // indicate we just started a span // in case it is escaped right afterwards last-was-lbracket = true + } else if last-was-fmt-colon and codepoints.len() > code-i + 1 and codepoints.at(code-i + 1) in ("<", "^", ">") { + // don't error on mid-span { if this { might be used for padding + // 'escape' it right away + // e.g. {a:{<5} => formats "bc" as "{{{bc" + current-fmt-name += character + last-was-lbracket = false } else { // if in the middle of a larger span ({ ... { <-): // error excessive-lbracket() } + + last-was-fmt-colon = false } else if character == "}" { last-was-lbracket = false if current-fmt-span == none { @@ -154,11 +165,18 @@ // in case this is an escaped } last-was-rbracket = true } + } else if last-was-fmt-colon and codepoints.len() > code-i + 1 and codepoints.at(code-i + 1) in ("<", "^", ">") { + // don't close span with } if this } might be used for padding + // e.g. {a:}<5} => formats "bc" as "}}}bc" + current-fmt-name += character + last-was-rbracket = false } else { // { ... } <--- ok, close the previous span // Do this eagerly, escaping } inside { ... } is invalid (result, current-fmt-span, current-fmt-name) = write-format-span(i, result, current-fmt-span, current-fmt-name) } + + last-was-fmt-colon = false } else { if last-was-rbracket { if current-fmt-span == none { @@ -169,9 +187,13 @@ panic("String formatter error: internal error: invalid right bracket state") } } - // {abc <--- add character to the format name - if current-fmt-name != none { + + if current-fmt-name == none { + last-was-fmt-colon = false + } else { + // {abc <--- add character to the format name current-fmt-name += character + last-was-fmt-colon = character == ":" } last-was-lbracket = false last-was-rbracket = false @@ -179,12 +201,14 @@ last-i = i i += character.len() // index must be in bytes, and a UTF-8 codepoint can have more than one byte + code-i += 1 } // { ... if current-fmt-span != none { if last-was-rbracket { // ... } <--- ok, close span (result, current-fmt-span, current-fmt-name) = write-format-span(last-i, result, current-fmt-span, current-fmt-name) + last-was-fmt-colon = false } else { // {abcd| <--- string ended with unclosed span missing-rbracket() diff --git a/tests/strfmt-tests.typ b/tests/strfmt-tests.typ index e7a37ff..f9168df 100644 --- a/tests/strfmt-tests.typ +++ b/tests/strfmt-tests.typ @@ -179,6 +179,17 @@ assert.eq(strfmt("{{{}}}", 1), "{1}") assert.eq(strfmt("{{"), "{") assert.eq(strfmt("}}"), "}") + + // Issue #28: pad with {} inside : + assert.eq(strfmt("{:}>4}", "a"), "}}}a") + assert.eq(strfmt("{:}^4}", "a"), "}}a}}") + assert.eq(strfmt("{:}<4}", "a"), "a}}}") + assert.eq(strfmt("{:{>4}", "a"), "{{{a") + assert.eq(strfmt("{:{^4}", "a"), "{{a{{") + assert.eq(strfmt("{:{<4}", "a"), "a{{{") + assert.eq(strfmt("{:{^}", "a"), "a") + assert.eq(strfmt("{:}^}", "a"), "a") + assert.eq(strfmt("{:}}}", "a"), "a}") } // DOC TESTS #{ From e8ca90eccd219c6550af54d091178c283a52f444 Mon Sep 17 00:00:00 2001 From: PgBiel <9021226+PgBiel@users.noreply.github.com> Date: Wed, 4 Jun 2025 01:47:50 -0300 Subject: [PATCH 6/9] Match center align to Rust's (#31) * match center align to Rust's * update readme --- README.md | 4 ++-- oxifmt.typ | 6 ++++-- tests/strfmt-tests.typ | 21 +++++++++++++++++---- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index c52e57f..7c33ade 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ You can use `{:spec}` to customize your output. See the Rust docs linked above f - For strings, `?` (and thus `repr()`) has the effect of printing them with double quotes. For floats, this ensures a `.0` appears after it, even if it doesn't have decimal digits. For integers, this doesn't change anything. Finally, for labels, the `