diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index de303fa..5eb1784 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,7 +30,7 @@ jobs: run: cargo fmt --check --all - name: Check Clippy - run: cargo clippy --workspace --all-targets --features wasm,lua54,bench + run: cargo clippy --workspace --all-targets --features wasm,lua54 build: runs-on: ${{ matrix.os }} @@ -76,7 +76,7 @@ jobs: run: cargo build --features wasm - name: Wasm-Pack Test - run: wasm-pack test --node + run: wasm-pack test --node --features wasm build-lua: runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index e2a785d..f12d396 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.18" +version = "0.6.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" dependencies = [ "anstyle", "anstyle-parse", @@ -34,50 +34,86 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" dependencies = [ "windows-sys", ] [[package]] name = "anstyle-wincon" -version = "3.0.7" +version = "3.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" dependencies = [ "anstyle", - "once_cell", + "once_cell_polyfill", "windows-sys", ] +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "ascii-canvas" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1e3e699d84ab1b0911a1010c5c106aa34ae89aeac103be5ce0c3859db1e891" +dependencies = [ + "term", +] + [[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" -version = "2.9.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "block-buffer" @@ -90,9 +126,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.11.3" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" dependencies = [ "memchr", "serde", @@ -100,9 +136,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" [[package]] name = "cast" @@ -112,24 +148,18 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.16" +version = "1.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" +checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" dependencies = [ "shlex", ] [[package]] name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - -[[package]] -name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "ciborium" @@ -160,9 +190,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.31" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "027bb0d98429ae334a8698531da7077bdf906419543a35a55c2cb1b66437d767" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" dependencies = [ "clap_builder", "clap_derive", @@ -170,9 +200,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.31" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5589e0cba072e0f3d23791efac0fd8627b49c829c196a492e88168e6a669d863" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" dependencies = [ "anstream", "anstyle", @@ -182,9 +212,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.28" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" +checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce" dependencies = [ "heck", "proc-macro2", @@ -194,15 +224,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "colored" @@ -219,7 +249,7 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "wasm-bindgen", ] @@ -247,25 +277,22 @@ dependencies = [ [[package]] name = "criterion" -version = "0.5.1" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +checksum = "3bf7af66b0989381bd0be551bd7cc91912a655a58c6918420c9527b1fd8b4679" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", - "is-terminal", - "itertools 0.10.5", + "itertools 0.13.0", "num-traits", - "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", - "serde_derive", "serde_json", "tinytemplate", "walkdir", @@ -334,9 +361,18 @@ dependencies = [ [[package]] name = "either" -version = "1.14.0" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "ena" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d" +checksum = "3d248bdd43ce613d87415282f69b9bb99d947d290b10962dd6c56233312c2ad5" +dependencies = [ + "log", +] [[package]] name = "equivalent" @@ -354,6 +390,18 @@ dependencies = [ "typeid", ] +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "generic-array" version = "0.14.7" @@ -366,19 +414,19 @@ dependencies = [ [[package]] name = "half" -version = "2.4.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crunchy", ] [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" [[package]] name = "heck" @@ -387,33 +435,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] -name = "hermit-abi" -version = "0.4.0" +name = "home" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +dependencies = [ + "windows-sys", +] [[package]] name = "indexmap" -version = "2.7.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown", "serde", ] -[[package]] -name = "is-terminal" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -438,6 +478,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -454,24 +503,73 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "keccak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc2af9a1119c51f12a14607e783cb977bde58bc069ff0c3da1095e635d70654" +dependencies = [ + "cpufeatures", +] + +[[package]] +name = "lalrpop" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba4ebbd48ce411c1d10fb35185f5a51a7bfa3d8b24b4e330d30c9e3a34129501" +dependencies = [ + "ascii-canvas", + "bit-set", + "ena", + "itertools 0.14.0", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax", + "sha3", + "string_cache", + "term", + "unicode-xid", + "walkdir", +] + +[[package]] +name = "lalrpop-util" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5baa5e9ff84f1aefd264e6869907646538a52147a755d494517a8007fb48733" +dependencies = [ + "regex-automata", + "rustversion", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" -version = "0.2.170" +version = "0.2.173" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" +checksum = "d8cfeafaffdbc32176b64fb251369d52ea9f0a8fbc6f8759edffef7b525d64bb" [[package]] name = "libcorn" version = "0.10.0" dependencies = [ - "cfg-if 1.0.0", + "anyhow", "console_error_panic_hook", "criterion", "indexmap", + "itoa", + "lalrpop", + "lalrpop-util", + "logos", "mlua", "paste", - "pest", - "pest_derive", "serde", "serde-wasm-bindgen", "serde_bytes", @@ -481,14 +579,13 @@ dependencies = [ "toml_edit", "wasm-bindgen", "wasm-bindgen-test", - "wee_alloc", ] [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" dependencies = [ "autocfg", "scopeguard", @@ -496,21 +593,49 @@ dependencies = [ [[package]] name = "log" -version = "0.4.26" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] -name = "memchr" -version = "2.7.4" +name = "logos" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab6f536c1af4c7cc81edf73da1f8029896e7e1e16a219ef09b184e76a296f3db" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "189bbfd0b61330abea797e5e9276408f2edbe4f822d7ad08685d67419aafb34e" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "ebfe8e1a19049ddbfccbd14ac834b215e11b85b90bab0c2dba7c7b92fb5d5cba" +dependencies = [ + "logos-codegen", +] [[package]] -name = "memory_units" -version = "0.4.0" +name = "memchr" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8452105ba047068f40ff7093dd1d9da90898e63dd61736462e9cdda6a90ad3c3" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "minicov" @@ -524,9 +649,9 @@ dependencies = [ [[package]] name = "mlua" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3f763c1041eff92ffb5d7169968a327e1ed2ebfe425dac0ee5a35f29082534b" +checksum = "c1f5f8fbebc7db5f671671134b9321c4b9aa9adeafccfd9a8c020ae45c6a35d0" dependencies = [ "bstr", "either", @@ -536,18 +661,19 @@ dependencies = [ "num-traits", "parking_lot", "rustc-hash", + "rustversion", "serde", "serde-value", ] [[package]] name = "mlua-sys" -version = "0.6.7" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1901c1a635a22fe9250ffcc4fcc937c16b47c2e9e71adba8784af8bca1f69594" +checksum = "380c1f7e2099cafcf40e51d3a9f20a346977587aa4d012eae1f043149a728a93" dependencies = [ "cc", - "cfg-if 1.0.0", + "cfg-if", "pkg-config", ] @@ -566,6 +692,12 @@ dependencies = [ "syn", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "num-traits" version = "0.2.19" @@ -577,15 +709,21 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.3" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" [[package]] name = "oorandom" -version = "11.1.4" +version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "ordered-float" @@ -598,9 +736,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" dependencies = [ "lock_api", "parking_lot_core", @@ -608,11 +746,11 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.10" +version = "0.9.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "redox_syscall", "smallvec", @@ -626,49 +764,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] -name = "pest" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" -dependencies = [ - "memchr", - "thiserror", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.7.15" +name = "petgraph" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "pest", - "pest_generator", + "fixedbitset", + "indexmap", ] [[package]] -name = "pest_generator" -version = "2.7.15" +name = "phf_shared" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn", + "siphasher", ] [[package]] -name = "pest_meta" -version = "2.7.15" +name = "pico-args" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" -dependencies = [ - "once_cell", - "pest", - "sha2", -] +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" [[package]] name = "pkg-config" @@ -704,6 +822,12 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -728,18 +852,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.39" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1f1914ce909e1658d9907913b4b91947430c7d9be598b15a1912935b8c04801" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -766,9 +890,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.10" +version = "0.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" dependencies = [ "bitflags", ] @@ -808,11 +932,20 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustversion" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] name = "ryu" @@ -835,11 +968,17 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + [[package]] name = "serde" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] @@ -867,18 +1006,18 @@ dependencies = [ [[package]] name = "serde_bytes" -version = "0.11.16" +version = "0.11.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "364fec0df39c49a083c9a8a18a23a6bcfd9af130fe9fe321d18520a0d113e09e" +checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" dependencies = [ "serde", ] [[package]] name = "serde_derive" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", @@ -912,22 +1051,21 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" dependencies = [ "serde", ] [[package]] -name = "sha2" +name = "sha3" version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" dependencies = [ - "cfg-if 1.0.0", - "cpufeatures", "digest", + "keccak", ] [[package]] @@ -936,11 +1074,29 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "string_cache" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", +] [[package]] name = "strsim" @@ -950,15 +1106,25 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.99" +version = "2.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02e925281e18ffd9d640e234264753c43edc62d64b2d4cf898f1bc5e75f3fc2" +checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "term" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a984c8d058c627faaf5e8e2ed493fa3c51771889196de1016cf9c1c6e90d750" +dependencies = [ + "home", + "windows-sys", +] + [[package]] name = "thiserror" version = "2.0.12" @@ -991,26 +1157,33 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.24" +version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ "indexmap", "serde", "serde_spanned", "toml_datetime", + "toml_write", "winnow", ] +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "typeid" version = "1.0.3" @@ -1023,18 +1196,18 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" -[[package]] -name = "ucd-trie" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" - [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unsafe-libyaml-norway" version = "0.2.15" @@ -1069,7 +1242,7 @@ version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", @@ -1095,7 +1268,7 @@ version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "js-sys", "once_cell", "wasm-bindgen", @@ -1168,34 +1341,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "wee_alloc" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb3b5a6b2bb17cb6ad44a2e68a43e8d2722c997da10e928665c72ec6c0a0b8e" -dependencies = [ - "cfg-if 0.1.10", - "libc", - "memory_units", - "winapi", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.9" @@ -1205,12 +1350,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-sys" version = "0.59.0" @@ -1286,9 +1425,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.3" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7f4ea97f6f78012141bcdb6a216b2609f0979ada50b20ca5b52dde2eac2bb1" +checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index 2725ac9..2f71b4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,8 +9,11 @@ license = "MIT" description = "Parsing engine for Corn, a simple and pain-free configuration language." repository = "https://github.com/JakeStanger/corn" categories = ["config", "parsing"] -keywords = ["configuration", "language", "wasm", "pest", "peg"] -authors = ["Jake Stanger "] +keywords = ["configuration", "language", "wasm", "peg"] +authors = [ + "Jake Stanger ", + "Matilde Morrone ", +] homepage = "https://cornlang.dev/" documentation = "https://docs.rs/libcorn" readme = "README.md" @@ -21,10 +24,9 @@ wasm = [ "dep:wasm-bindgen", "dep:serde-wasm-bindgen", "dep:console_error_panic_hook", - "dep:wee_alloc", ] -# Lua version support +# Lua support lua51 = ["dep:mlua", "mlua/lua51"] lua52 = ["dep:mlua", "mlua/lua52"] lua53 = ["dep:mlua", "mlua/lua53"] @@ -32,49 +34,45 @@ lua54 = ["dep:mlua", "mlua/lua54"] luajit = ["dep:mlua", "mlua/luajit"] luajit52 = ["dep:mlua", "mlua/luajit52"] -# Internal features -bench = ["dep:criterion"] - [lib] name = "corn" crate-type = ["cdylib", "rlib"] [dependencies] # Core -pest = "2.7.15" -pest_derive = "2.7.15" -serde = { version = "1.0.218", features = ["derive"] } -indexmap = { version = "2.7.1", features = ["serde"] } - -# Error handling +serde = "1.0.219" +indexmap = { version = "2.9.0", features = ["serde"] } +logos = "0.15.0" +lalrpop-util = { version = "0.22.2", features = ["lexer"] } +itoa = "1.0.15" thiserror = "2.0.12" -# Utilities -cfg-if = "1.0.0" - # WASM support (optional) wasm-bindgen = { version = "0.2.100", optional = true } serde-wasm-bindgen = { version = "0.6.5", optional = true } console_error_panic_hook = { version = "0.1.7", optional = true } -wee_alloc = { version = "0.4.5", optional = true } # Lua support (optional) -mlua = { version = "0.10.3", features = [ +mlua = { version = "0.10.5", features = [ "module", "macros", "serialize", ], optional = true } -# Benchmarking (optional) -criterion = { version = "0.5.1", features = ["html_reports"], optional = true } +[build-dependencies] +lalrpop = "0.22.2" [dev-dependencies] -paste = "1.0.15" wasm-bindgen-test = { version = "0.3.50" } serde_json = "1.0.140" serde_norway = "0.9.42" -serde_bytes = "0.11.16" -toml_edit = { version = "0.22.24", features = ["serde"] } +serde_bytes = "0.11.17" +toml_edit = { version = "0.22.27", features = ["serde"] } +anyhow = "1.0.97" +paste = "1.0.15" + +[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] +criterion = { version = "0.6.0", features = ["html_reports"] } [profile.release] lto = true @@ -84,4 +82,9 @@ strip = true [[bench]] name = "serde" harness = false -required-features = ["bench"] + +[package.metadata.wasm-pack.profile.profiling] +wasm-opt = ['-O', '--enable-bulk-memory'] + +[package.metadata.wasm-pack.profile.release] +wasm-opt = ['-O', '--enable-bulk-memory'] diff --git a/assets/inputs/compact.corn b/assets/inputs/compact.corn index 49185de..0e78286 100644 --- a/assets/inputs/compact.corn +++ b/assets/inputs/compact.corn @@ -8,7 +8,7 @@ seven={foo=[] bar=[]} eight=["foo""bar"] - nine=[truefalse] + nine=[true false] ten=[1 2] eleven=[[][]] twelve=[{}{}] diff --git a/assets/inputs/complex.corn b/assets/inputs/complex.corn index 2d763d5..5710752 100644 --- a/assets/inputs/complex.corn +++ b/assets/inputs/complex.corn @@ -11,7 +11,7 @@ in { // here is another comment name.first = $firstName name.last = $lastName - name.full = "$firstName $lastName" + name.full = "${firstName} ${lastName}" age = $age employment = { diff --git a/assets/inputs/complex_keys.corn b/assets/inputs/complex_keys.corn index cb74912..fca06cc 100644 --- a/assets/inputs/complex_keys.corn +++ b/assets/inputs/complex_keys.corn @@ -2,7 +2,7 @@ with_underscore = 0 with-dash = 1 with_🌽 = 2 - !"£$%^&*()_ = 3 + !£$%^&*()_ = 3 j12345 = 4 foo.bar-baz = "hello" apple-pie.crust = "yum" diff --git a/assets/inputs/integer.corn b/assets/inputs/integer.corn index d02fb21..8673268 100644 --- a/assets/inputs/integer.corn +++ b/assets/inputs/integer.corn @@ -1,5 +1,38 @@ { - foo = 42 - bar = 0xfafafa - baz = 1_000_000 + // Basic decimal integers + positive_decimal = 42 + negative_decimal = -123 + zero = 0 + + // Decimal with underscores + large_decimal = 1_000_000 + negative_large_decimal = -1_234_567 + decimal_with_underscores = 12_34_56 + + // Hexadecimal integers + hex_lowercase = 0xfafafa + hex_uppercase = 0xFF + hex_mixed = 0xAbCdEf + negative_hex = -0xABC + hex_with_underscores = 0xFA_FA_FA + negative_hex_underscores = -0xAB_CD_EF + hex_single_digit = 0xF + hex_zero = 0x0 + + // Octal integers + octal_basic = 0o755 + octal_zero = 0o0 + negative_octal = -0o123 + octal_with_underscores = 0o12_34_56 + negative_octal_underscores = -0o76_54_32 + octal_single_digit = 0o7 + + // Binary integers + binary_basic = 0b1010 + binary_zero = 0b0 + negative_binary = -0b1100 + binary_with_underscores = 0b1010_1100_1111 + negative_binary_underscores = -0b1100_0011 + binary_single_digit = 0b1 + binary_all_ones = 0b1111 } diff --git a/assets/inputs/quoted_keys.corn b/assets/inputs/quoted_keys.corn index da61425..dd3e0f6 100644 --- a/assets/inputs/quoted_keys.corn +++ b/assets/inputs/quoted_keys.corn @@ -4,4 +4,5 @@ 'with spaces' = true 'escaped\'quote' = false 'escaped=equals' = -3 + 'normal"quotes' = "funny" } \ No newline at end of file diff --git a/assets/inputs/string.corn b/assets/inputs/string.corn index 640240a..52a29c4 100644 --- a/assets/inputs/string.corn +++ b/assets/inputs/string.corn @@ -1,6 +1,6 @@ { foo = "bar" bar = "\"\\\n\r\t" - baz = "\u0061" + baz = "\u{0061}" qux = "" } diff --git a/assets/inputs/string_interpolation.corn b/assets/inputs/string_interpolation.corn index 465426e..4b0dd10 100644 --- a/assets/inputs/string_interpolation.corn +++ b/assets/inputs/string_interpolation.corn @@ -2,6 +2,6 @@ let { $greeting = "hello" $subject = "world" } in { - foo = "$greeting, $subject" + foo = "${greeting}, ${subject}" bar = "\$escaped" } diff --git a/assets/inputs/unicode.corn b/assets/inputs/unicode.corn new file mode 100644 index 0000000..2b027c4 --- /dev/null +++ b/assets/inputs/unicode.corn @@ -0,0 +1,6 @@ +{ + plane0 = "\u{053E}" + plane1 = "\u{10514}" + plane2 = "\u{2B77C}" + plane3 = "\u{3106C}" +} \ No newline at end of file diff --git a/assets/inputs/very_compact.corn b/assets/inputs/very_compact.corn index 3932084..4af65a8 100644 --- a/assets/inputs/very_compact.corn +++ b/assets/inputs/very_compact.corn @@ -1 +1 @@ -{one={foo="bar" bar="foo"} two={foo=1 bar=2} three={foo=1.0 bar=2.0} four={foo=true bar=false} five={foo=null bar=null} six={foo={} bar={}} seven={foo=[] bar=[]} eight=["foo""bar"] nine=[truefalse] ten=[1 2] eleven=[[][]] twelve=[{}{}]} +{one={foo="bar" bar="foo"} two={foo=1 bar=2} three={foo=1.0 bar=2.0} four={foo=true bar=false} five={foo=null bar=null} six={foo={} bar={}} seven={foo=[] bar=[]} eight=["foo""bar"] nine=[true false] ten=[1 2] eleven=[[][]] twelve=[{}{}]} diff --git a/assets/outputs/json/complex.json b/assets/outputs/json/complex.json index ae476ef..89d0f90 100644 --- a/assets/outputs/json/complex.json +++ b/assets/outputs/json/complex.json @@ -5,12 +5,12 @@ "full": "John Smith" }, "age": 32, - "placeholder": null, "employment": { "employed": true, "name": "Postman", "sinceYear": 2019 }, + "placeholder": null, "parents": { "father": { "birthday": { diff --git a/assets/outputs/json/complex_keys.json b/assets/outputs/json/complex_keys.json index 3fac302..9b212be 100644 --- a/assets/outputs/json/complex_keys.json +++ b/assets/outputs/json/complex_keys.json @@ -2,7 +2,7 @@ "with_underscore": 0, "with-dash": 1, "with_🌽": 2, - "!\"£$%^&*()_": 3, + "!£$%^&*()_": 3, "j12345": 4, "foo": { "bar-baz": "hello" diff --git a/assets/outputs/json/integer.json b/assets/outputs/json/integer.json index b5ceb63..5855b47 100644 --- a/assets/outputs/json/integer.json +++ b/assets/outputs/json/integer.json @@ -1,5 +1,29 @@ { - "foo": 42, - "bar": 16448250, - "baz": 1000000 + "positive_decimal": 42, + "negative_decimal": -123, + "zero": 0, + "large_decimal": 1000000, + "negative_large_decimal": -1234567, + "decimal_with_underscores": 123456, + "hex_lowercase": 16448250, + "hex_uppercase": 255, + "hex_mixed": 11259375, + "negative_hex": -2748, + "hex_with_underscores": 16448250, + "negative_hex_underscores": -11259375, + "hex_single_digit": 15, + "hex_zero": 0, + "octal_basic": 493, + "octal_zero": 0, + "negative_octal": -83, + "octal_with_underscores": 42798, + "negative_octal_underscores": -256794, + "octal_single_digit": 7, + "binary_basic": 10, + "binary_zero": 0, + "negative_binary": -12, + "binary_with_underscores": 2767, + "negative_binary_underscores": -195, + "binary_single_digit": 1, + "binary_all_ones": 15 } diff --git a/assets/outputs/json/quoted_keys.json b/assets/outputs/json/quoted_keys.json index c1bc3f5..a0ed5e5 100644 --- a/assets/outputs/json/quoted_keys.json +++ b/assets/outputs/json/quoted_keys.json @@ -7,5 +7,6 @@ }, "with spaces": true, "escaped'quote": false, - "escaped=equals": -3 + "escaped=equals": -3, + "normal\"quotes": "funny" } diff --git a/assets/outputs/json/string.json b/assets/outputs/json/string.json index 53e6ede..61c3a14 100644 --- a/assets/outputs/json/string.json +++ b/assets/outputs/json/string.json @@ -1,6 +1,6 @@ { "foo": "bar", - "bar": "\"\\\n", + "bar": "\"\\\n\r\t", "baz": "a", "qux": "" } diff --git a/assets/outputs/json/unicode.json b/assets/outputs/json/unicode.json new file mode 100644 index 0000000..f725d8c --- /dev/null +++ b/assets/outputs/json/unicode.json @@ -0,0 +1,6 @@ +{ + "plane0": "Ծ", + "plane1": "𐔔", + "plane2": "𫝼", + "plane3": "𱁬" +} diff --git a/assets/outputs/toml/complex.toml b/assets/outputs/toml/complex.toml index 3cf09b6..2717888 100644 --- a/assets/outputs/toml/complex.toml +++ b/assets/outputs/toml/complex.toml @@ -7,7 +7,7 @@ favourites = [ "Postman", 4.73753, false, - { hello = "world" }, + { hello = "world", food = { favourite = "egg", hated = "beef" } }, ] empty2 = [] diff --git a/assets/outputs/toml/complex_keys.toml b/assets/outputs/toml/complex_keys.toml index 072b05e..4647d38 100644 --- a/assets/outputs/toml/complex_keys.toml +++ b/assets/outputs/toml/complex_keys.toml @@ -1,7 +1,7 @@ with_underscore = 0 with-dash = 1 "with_🌽" = 2 -'!"£$%^&*()_' = 3 +"!£$%^&*()_" = 3 j12345 = 4 [foo] diff --git a/assets/outputs/toml/integer.toml b/assets/outputs/toml/integer.toml index 23bf8f5..c35536e 100644 --- a/assets/outputs/toml/integer.toml +++ b/assets/outputs/toml/integer.toml @@ -1,4 +1,28 @@ -foo = 42 -bar = 16448250 -baz = 1000000 +positive_decimal = 42 +negative_decimal = -123 +zero = 0 +large_decimal = 1000000 +negative_large_decimal = -1234567 +decimal_with_underscores = 123456 +hex_lowercase = 16448250 +hex_uppercase = 255 +hex_mixed = 11259375 +negative_hex = -2748 +hex_with_underscores = 16448250 +negative_hex_underscores = -11259375 +hex_single_digit = 15 +hex_zero = 0 +octal_basic = 493 +octal_zero = 0 +negative_octal = -83 +octal_with_underscores = 42798 +negative_octal_underscores = -256794 +octal_single_digit = 7 +binary_basic = 10 +binary_zero = 0 +negative_binary = -12 +binary_with_underscores = 2767 +negative_binary_underscores = -195 +binary_single_digit = 1 +binary_all_ones = 15 diff --git a/assets/outputs/toml/quoted_keys.toml b/assets/outputs/toml/quoted_keys.toml index 06bc629..d95a19d 100644 --- a/assets/outputs/toml/quoted_keys.toml +++ b/assets/outputs/toml/quoted_keys.toml @@ -2,6 +2,7 @@ "with spaces" = true "escaped'quote" = false "escaped=equals" = -3 +'normal"quotes' = "funny" ["green.eggs".and] ham = "hello world" diff --git a/assets/outputs/toml/string.toml b/assets/outputs/toml/string.toml index f4cc605..233fbf6 100644 --- a/assets/outputs/toml/string.toml +++ b/assets/outputs/toml/string.toml @@ -1,7 +1,7 @@ foo = "bar" -bar = ''' -"\ -''' +bar = """ +"\\ +\r\t""" baz = "a" qux = "" diff --git a/assets/outputs/toml/unicode.toml b/assets/outputs/toml/unicode.toml new file mode 100644 index 0000000..98cf61f --- /dev/null +++ b/assets/outputs/toml/unicode.toml @@ -0,0 +1,5 @@ +plane0 = "Ծ" +plane1 = "𐔔" +plane2 = "𫝼" +plane3 = "𱁬" + diff --git a/assets/outputs/yaml/complex.yml b/assets/outputs/yaml/complex.yml index 8fbbc46..1c34547 100644 --- a/assets/outputs/yaml/complex.yml +++ b/assets/outputs/yaml/complex.yml @@ -3,11 +3,11 @@ name: last: Smith full: John Smith age: 32 -placeholder: null employment: employed: true name: Postman sinceYear: 2019 +placeholder: null parents: father: birthday: diff --git a/assets/outputs/yaml/complex_keys.yml b/assets/outputs/yaml/complex_keys.yml index cb4e419..24d3214 100644 --- a/assets/outputs/yaml/complex_keys.yml +++ b/assets/outputs/yaml/complex_keys.yml @@ -1,7 +1,7 @@ with_underscore: 0 with-dash: 1 with_🌽: 2 -'!"£$%^&*()_': 3 +'!£$%^&*()_': 3 j12345: 4 foo: bar-baz: hello diff --git a/assets/outputs/yaml/integer.yml b/assets/outputs/yaml/integer.yml index e76f2c1..f2d1400 100644 --- a/assets/outputs/yaml/integer.yml +++ b/assets/outputs/yaml/integer.yml @@ -1,4 +1,28 @@ -foo: 42 -bar: 16448250 -baz: 1000000 +positive_decimal: 42 +negative_decimal: -123 +zero: 0 +large_decimal: 1000000 +negative_large_decimal: -1234567 +decimal_with_underscores: 123456 +hex_lowercase: 16448250 +hex_uppercase: 255 +hex_mixed: 11259375 +negative_hex: -2748 +hex_with_underscores: 16448250 +negative_hex_underscores: -11259375 +hex_single_digit: 15 +hex_zero: 0 +octal_basic: 493 +octal_zero: 0 +negative_octal: -83 +octal_with_underscores: 42798 +negative_octal_underscores: -256794 +octal_single_digit: 7 +binary_basic: 10 +binary_zero: 0 +negative_binary: -12 +binary_with_underscores: 2767 +negative_binary_underscores: -195 +binary_single_digit: 1 +binary_all_ones: 15 diff --git a/assets/outputs/yaml/quoted_keys.yml b/assets/outputs/yaml/quoted_keys.yml index da5aced..8172c2d 100644 --- a/assets/outputs/yaml/quoted_keys.yml +++ b/assets/outputs/yaml/quoted_keys.yml @@ -5,4 +5,5 @@ green.eggs: with spaces: true escaped'quote: false escaped=equals: -3 +normal"quotes: funny diff --git a/assets/outputs/yaml/string.yml b/assets/outputs/yaml/string.yml index f1649d4..6a78af1 100644 --- a/assets/outputs/yaml/string.yml +++ b/assets/outputs/yaml/string.yml @@ -1,6 +1,5 @@ foo: bar -bar: | - "\ +bar: "\"\\\n\r\t" baz: a qux: '' diff --git a/assets/outputs/yaml/unicode.yml b/assets/outputs/yaml/unicode.yml new file mode 100644 index 0000000..d53844d --- /dev/null +++ b/assets/outputs/yaml/unicode.yml @@ -0,0 +1,5 @@ +plane0: Ծ +plane1: 𐔔 +plane2: 𫝼 +plane3: 𱁬 + diff --git a/benches/serde.rs b/benches/serde.rs index 8daf672..35807c3 100644 --- a/benches/serde.rs +++ b/benches/serde.rs @@ -1,6 +1,7 @@ -use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use criterion::{criterion_group, criterion_main, Criterion}; use paste::paste; use serde::Deserialize; +use std::hint::black_box; macro_rules! generate_benches { ($(($test_name:ident, $test_type:ty)),+) => { @@ -233,7 +234,7 @@ struct ComplexParentsFatherBirthday { #[derive(Deserialize, Debug, PartialEq)] struct ComplexKeys { - #[serde(rename = "!\"£$%^&*()_")] + #[serde(rename = "!£$%^&*()_")] symbols: i64, #[serde(rename = "apple-pie")] apple_pie: ComplexKeysApplePie, diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..ab1424e --- /dev/null +++ b/build.rs @@ -0,0 +1,5 @@ +fn main() { + println!("cargo::rerun-if-changed=src/corn.lalrpop"); + + lalrpop::process_src().expect("Failed to parse grammar"); +} diff --git a/cli/src/error.rs b/cli/src/error.rs index 58f213b..25d1028 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,5 +1,5 @@ use colored::Colorize; -use corn::error::Error as CornError; +use corn::Error as CornError; use std::fmt::{Display, Formatter}; use std::io; @@ -20,12 +20,11 @@ pub trait ExitCode { impl ExitCode for CornError { fn get_exit_code(&self) -> i32 { match self { + CornError::ParseError(_) => 1, CornError::Io(_) => 3, - CornError::ParserError(_) => 1, CornError::InputResolveError(_) => 2, - CornError::InvalidPathError(_) => 6, - CornError::InvalidSpreadError(_) => 7, - CornError::InvalidInterpolationError(_) => 8, + CornError::InvalidSpreadError => 7, + CornError::InvalidInterpolationError => 8, CornError::DeserializationError(_) => 5, } } diff --git a/cli/src/main.rs b/cli/src/main.rs index 71e40cb..53cdb5f 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,4 +1,4 @@ -use corn::{parse, Value}; +use corn::{parse, BorrowedValue}; use std::io::Read; use std::process::exit; use std::{fs, io}; @@ -48,7 +48,7 @@ fn main() { let output_type = get_output_type(args.output_type); match parse(&unparsed_file) { - Ok(config) => match serialize(&config, output_type) { + Ok(config) => match serialize(config, output_type) { Ok(serialized) => println!("{serialized}"), Err(err) => handle_err(&err), }, @@ -81,7 +81,7 @@ fn get_output_type(arg: Option) -> OutputType { OutputType::Json } -fn serialize(config: &Value, output_type: OutputType) -> Result { +fn serialize(config: BorrowedValue, output_type: OutputType) -> Result { match output_type { OutputType::Json => serde_json::to_string_pretty(&config).map_err(Error::from), OutputType::Yaml => serde_norway::to_string(&config).map_err(Error::from), diff --git a/scripts/generate-tests.sh b/scripts/generate-tests.sh index 53cfe96..a63236c 100755 --- a/scripts/generate-tests.sh +++ b/scripts/generate-tests.sh @@ -10,7 +10,7 @@ for file in assets/inputs/*; do echo "$basename" - cargo run --bin corn -- "$file" -t json > assets/outputs/json/"$basename".json - cargo run --bin corn -- "$file" -t yaml > assets/outputs/yaml/"$basename".yml - cargo run --bin corn -- "$file" -t toml > assets/outputs/toml/"$basename".toml + cargo run --package corn-cli -- "$file" -t json > assets/outputs/json/"$basename".json + cargo run --package corn-cli -- "$file" -t yaml > assets/outputs/yaml/"$basename".yml + cargo run --package corn-cli -- "$file" -t toml > assets/outputs/toml/"$basename".toml done \ No newline at end of file diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..9f51a52 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,68 @@ +use std::collections::HashMap; + +use crate::{lexer::StringPart, Integer}; + +/// Store for input declarations +pub type Inputs<'input> = HashMap<&'input str, Entry<'input>>; + +/// Top level ast object +#[derive(Debug, Clone)] +pub struct Root<'input> { + /// Raw inputs + pub inputs: Inputs<'input>, + /// Top level object, values aren't interpolated + pub object: Object<'input>, +} + +/// Represents an object in the AST +#[derive(Debug, Clone)] +pub struct Object<'input> { + // /// The pairs or spread operations in the object + pub pairs: Vec>, +} + +/// Either a key-value pair or a spread operation +#[derive(Debug, Clone)] +pub enum PairOrSpread<'input> { + /// A key-value pair in an object + Pair(ChainedKey<'input>, Entry<'input>), + /// A spread operation in an object + Spread(&'input str), +} + +/// Represents a chained key like "foo.bar.baz" +#[derive(Debug, Clone)] +pub struct ChainedKey<'input> { + /// The segments of the key path + pub segments: Vec<&'input str>, +} + +/// An entry can be of various types as defined in the spec +#[derive(Debug, Clone)] +pub enum Entry<'input> { + /// String literal + String(Vec>), + /// Integer value + Integer(Integer), + /// Float value + Float(f64), + /// Boolean value + Boolean(bool), + /// Nested object + Object(Object<'input>), + /// Array of entries + Array(Vec>), + /// Null value + Null, + /// Reference to an input variable + Input(&'input str), +} + +/// Either a key or an array spread operation +#[derive(Debug, Clone)] +pub enum EntryOrSpread<'input> { + /// An entry can be of various types as defined in the spec + Entry(Entry<'input>), + /// Array spread operation + Spread(&'input str), +} diff --git a/src/corn.lalrpop b/src/corn.lalrpop new file mode 100644 index 0000000..d441637 --- /dev/null +++ b/src/corn.lalrpop @@ -0,0 +1,87 @@ +use crate::ast::{Root, Entry, Object, Inputs, PairOrSpread, ChainedKey, EntryOrSpread}; +use crate::lexer::{LexicalError, Token, StringPart}; +use crate::Integer; + +grammar<'input>(input: &'input str); + +extern { + type Location = usize; + type Error = LexicalError; + + enum Token<'input> { + "let" => Token::Let, + "in" => Token::In, + "null" => Token::Null, + "=" => Token::Equals, + "{" => Token::OpenBrace, + "}" => Token::CloseBrace, + "[" => Token::OpenBracket, + "]" => Token::CloseBracket, + "." => Token::Chain, + ".." => Token::Spread, + "literal" => Token::Literal(>>), + "int" => Token::Integer(), + "float" => Token::Float(), + "bool" => Token::Boolean(), + "input" => Token::InputName(<&'input str>), + "key" => Token::Key(<&'input str>), + } +} + +pub Root: Root<'input> = { + => { + Root { + inputs: inputs.unwrap_or_default(), + object, + } + } +} + +LetBlock: Inputs<'input> = { + "let" "{" )*> "}" "in" => inputs.into_iter().collect() +}; + +InputDeclaration: (&'input str, Entry<'input>) = { + "=" => (name, entry), +}; + +Entry: Entry<'input> = { + "literal" => Entry::String(<>), + "int" => Entry::Integer(<>), + "float" => Entry::Float(<>), + "bool" => Entry::Boolean(<>), + Object => Entry::Object(<>), + Array => Entry::Array(<>), + "null" => Entry::Null, + "input" => Entry::Input(<>), +} + +Object: Object<'input> = { + "{" )*> "}" => { + Object { + pairs, + } + } +}; + +Pair: PairOrSpread<'input> = { + "=" => PairOrSpread::Pair(k, v), + ".." => PairOrSpread::Spread(name), +}; + +KeyPath: ChainedKey<'input> = { + )*> => { + ChainedKey { + segments: [vec![first], rest].concat() + } + } +}; + +Array: Vec> = { + "[" "]" => entries, +}; + +ArrayEntry: EntryOrSpread<'input> = { + Entry => EntryOrSpread::Entry(<>), + ".." => EntryOrSpread::Spread(name), +}; \ No newline at end of file diff --git a/src/de.rs b/src/de.rs index 8cade85..3550370 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1,261 +1,425 @@ -use std::collections::VecDeque; +use std::borrow::Cow; -use serde::de::{self, DeserializeSeed, EnumAccess, IntoDeserializer, VariantAccess, Visitor}; +use indexmap::IndexMap; +use serde::de::{self, IntoDeserializer}; -use crate::error::{Error, Result}; -use crate::parse; -use crate::Value; +use crate::{ + ast::{Entry, EntryOrSpread, Inputs, PairOrSpread, Root}, + lexer::{Lexer, StringPart}, + parser::RootParser, + BorrowedObject, BorrowedValue, Error, Result, +}; -#[derive(Debug)] +/// A structure that deserializes Corn configuration values. +#[derive(Clone)] pub struct Deserializer<'de> { - value: Option>, + value: BorrowedValue<'de>, +} + +/// Parse a Corn configuration string into a borrowed value. +pub fn parse(input: &str) -> Result { + Deserializer::parse(input) } impl<'de> Deserializer<'de> { - pub fn from_str(input: &'de str) -> Result { - let parsed = parse(input)?; + /// Parse a Corn configuration string into a borrowed value. + pub fn parse(input: &str) -> Result { + let mut lexer = Lexer::new(input); + let parser = RootParser::new(); + let Root { inputs, object } = parser + .parse(input, &mut lexer) + .map_err(|err| Error::ParseError(err.to_string()))?; - Ok(Self::from_value(parsed)) + Self::resolve_entry(&Entry::Object(object), &inputs) } - fn from_value(value: Value<'de>) -> Self { - Self { value: Some(value) } + /// Create a deserializer from a Corn configuration string. + #[allow(clippy::should_implement_trait)] + pub fn from_str(input: &'de str) -> Result { + Self::parse(input).map(|value| Self { value }) } -} - -/// Attempts to deserialize the config from a string slice. -/// -/// # Errors -/// -/// Will return a `DeserializationError` if the config is invalid. -pub fn from_str(s: &str) -> Result -where - T: de::DeserializeOwned, -{ - let mut deserializer = Deserializer::from_str(s)?; - T::deserialize(&mut deserializer) -} -/// Attempts to deserialize the config from a byte slice. -/// -/// # Errors -/// -/// Will return a `DeserializationError` if the config is invalid. -pub fn from_slice(bytes: &[u8]) -> Result -where - T: de::DeserializeOwned, -{ - match std::str::from_utf8(bytes) { - Ok(s) => from_str(s), - Err(e) => Err(Error::DeserializationError(e.to_string())), + fn with_value(value: BorrowedValue<'de>) -> Self { + Self { value } } -} -macro_rules! get_value { - ($self:ident) => { - match $self.value.take() { - Some(val) => Ok(val), - None => Err(Error::DeserializationError(String::from( - "Deserializer value unexpectedly `None`", - ))), - }? - }; -} + fn resolve_entry<'input>( + entry: &Entry<'input>, + inputs: &Inputs<'input>, + ) -> Result> { + match entry { + Entry::String(parts) => { + if parts.is_empty() { + return Ok(BorrowedValue::String(Cow::Borrowed(""))); + } -macro_rules! err_expected { - ($expected:literal, $got:expr) => { - Err(Error::DeserializationError(format!( - "Expected {}, found '{:?}'", - $expected, $got - ))) - }; -} + let mut base = String::new(); -macro_rules! match_value { - ($self:ident, $name:literal, $($pat:pat => $expr:expr)+) => {{ - let value = get_value!($self); - match value { - $($pat => $expr, )+ - _ => err_expected!($name, value) - } - }}; -} + for part in parts { + match part { + StringPart::Literal(lit) => base.push_str(lit), + StringPart::Input(input) => { + let input = Self::resolve_input(input, inputs)?; -impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { - type Error = Error; + match input { + BorrowedValue::String(string) => base.push_str(&string), + _ => return Err(Error::InvalidInterpolationError), + } + } + } + } - fn deserialize_any(self, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - let value = get_value!(self); - match value { - Value::Object(_) => { - let map = Map::new(value); - visitor.visit_map(map) + Ok(BorrowedValue::String(Cow::Owned( + Self::process_multiline_string(&base), + ))) } - Value::Array(_) => { - let seq = Seq::new(value); - visitor.visit_seq(seq) + Entry::Integer(integer) => Ok(BorrowedValue::Integer(*integer)), + Entry::Float(float) => Ok(BorrowedValue::Float(*float)), + Entry::Boolean(boolean) => Ok(BorrowedValue::Boolean(*boolean)), + Entry::Object(obj) => { + let mut resolved_object = IndexMap::new(); + + for pair_or_spread in &obj.pairs { + match pair_or_spread { + PairOrSpread::Pair(key, value) => { + fn unescape_key(key: &str) -> Cow { + if key.contains("\\'") { + Cow::Owned(key.replace("\\'", "'")) + } else { + Cow::Borrowed(key) + } + } + + let processed_segments: Vec> = key + .segments + .iter() + .map(|segment| unescape_key(segment)) + .collect(); + + resolved_object.reserve_exact(processed_segments.len()); + + Self::insert_at_path( + &mut resolved_object, + &processed_segments, + Self::resolve_entry(value, inputs)?, + )?; + } + PairOrSpread::Spread(name) => { + if let Some(spread_entry) = inputs.get(name) { + match Self::resolve_entry(spread_entry, inputs)? { + BorrowedValue::Object(spread_obj) => { + resolved_object.extend(spread_obj); + } + _ => return Err(Error::InvalidSpreadError), + } + } else { + return Err(Error::InputResolveError(name.to_string())); + } + } + } + } + + resolved_object.shrink_to_fit(); + + Ok(BorrowedValue::Object(resolved_object)) + } + Entry::Array(items) => { + let mut resolved_array = Vec::with_capacity(items.len()); // We need at least the same amount of items + + for entry in items { + match entry { + EntryOrSpread::Entry(entry) => { + resolved_array.push(Self::resolve_entry(entry, inputs)?) + } + EntryOrSpread::Spread(spread) => match Self::resolve_input(spread, inputs)? + { + BorrowedValue::Array(array) => { + resolved_array.extend(array); + } + _ => return Err(Error::InvalidSpreadError), + }, + } + } + + Ok(BorrowedValue::Array(resolved_array)) } - Value::String(val) => visitor.visit_str(&val), - Value::Integer(val) => visitor.visit_i64(val), - Value::Float(val) => visitor.visit_f64(val), - Value::Boolean(val) => visitor.visit_bool(val), - Value::Null(_) => visitor.visit_unit(), + Entry::Null => Ok(BorrowedValue::Null), + Entry::Input(input) => Self::resolve_input(input, inputs), } } - fn deserialize_bool(self, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - match_value!(self, "boolean", Value::Boolean(val) => visitor.visit_bool(val)) - } + fn insert_at_path<'input>( + obj: &mut BorrowedObject<'input>, + path: &[Cow<'input, str>], + value: BorrowedValue<'input>, + ) -> Result<(), Error> { + if path.is_empty() { + return Err(Error::DeserializationError("Empty path".to_string())); + } - fn deserialize_i8(self, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - match_value!(self, "integer (i8)", Value::Integer(val) => visitor.visit_i8(val as i8)) - } + if path.len() == 1 { + obj.insert(path[0].clone(), value); + return Ok(()); + } - fn deserialize_i16(self, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - match_value!(self, "integer (i16)", Value::Integer(val) => visitor.visit_i16(val as i16)) - } + let (first, rest) = path.split_first().expect("Internal splitting error"); + let entry = obj + .entry(first.clone()) + .or_insert_with(|| BorrowedValue::Object(IndexMap::new())); - fn deserialize_i32(self, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - match_value!(self, "integer (i32)", Value::Integer(val) => visitor.visit_i32(val as i32)) - } + match entry { + BorrowedValue::Object(nested_obj) => { + Self::insert_at_path(nested_obj, rest, value)?; + } + _ => { + return Err(Error::DeserializationError(format!( + "Cannot index into non-object at key: {}", + first + ))); + } + } - fn deserialize_i64(self, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - match_value!(self, "integer (i64)", Value::Integer(val) => visitor.visit_i64(val)) + Ok(()) } - fn deserialize_u8(self, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - match_value!(self, "integer (u8)", Value::Integer(val) => visitor.visit_u8(val as u8)) + fn process_multiline_string(input: &str) -> String { + if !input.starts_with('\n') { + return input.to_string(); + } + + let lines: Vec<&str> = input.lines().collect(); + if lines.len() < 3 { + // Need at least: empty, content, empty/content + return input.to_string(); + } + + // Skip first empty line and handle last line (may be empty or just whitespace) + let mut content_lines: Vec<&str> = lines.iter().skip(1).copied().collect(); + + // Remove trailing lines that are empty or only whitespace + while let Some(&last) = content_lines.last() { + if last.trim().is_empty() { + content_lines.pop(); + } else { + break; + } + } + + if content_lines.is_empty() { + return String::new(); + } + + // Find minimum indentation of non-empty lines + let min_indent = content_lines + .iter() + .filter(|line| !line.trim().is_empty()) + .map(|line| line.len() - line.trim_start().len()) + .min() + .unwrap_or(0); + + // Remove minimum indentation and join with newlines + let result_lines: Vec<&str> = content_lines + .iter() + .map(|line| { + if line.trim().is_empty() { + "" + } else if line.len() >= min_indent { + &line[min_indent..] + } else { + line + } + }) + .collect(); + + let mut result = result_lines.join("\n"); + + if !result.is_empty() { + result.push('\n'); + } + + result } - fn deserialize_u16(self, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - match_value!(self, "integer (u16)", Value::Integer(val) => visitor.visit_u16(val as u16)) + fn resolve_input<'input>( + input: &str, + inputs: &Inputs<'input>, + ) -> Result> { + if let Some(env) = input.strip_prefix("env_") { + if let Ok(env) = std::env::var(env) { + return Ok(BorrowedValue::String(Cow::Owned(env))); + } + } + + if let Some(entry) = inputs.get(input) { + return Self::resolve_entry(entry, inputs); + } + + Err(Error::InputResolveError(input.to_string())) } +} + +/// Deserialize a Corn configuration string into a Rust data structure. +pub fn from_str<'a, T>(s: &'a str) -> Result +where + T: de::Deserialize<'a>, +{ + let mut deserializer = Deserializer::from_str(s)?; + + T::deserialize(&mut deserializer) +} + +macro_rules! deserialize_number { + ($method:ident) => { + fn $method(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + match self.value { + BorrowedValue::Integer(integer) => integer.deserialize_any(visitor), + ref value => Err(value.invalid_type("Integer")), + } + } + }; +} - fn deserialize_u32(self, visitor: V) -> std::result::Result +impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (u32)", Value::Integer(val) => visitor.visit_u32(val as u32)) + match self.value { + BorrowedValue::String(ref string) => match string { + Cow::Borrowed(s) => visitor.visit_borrowed_str(s), + Cow::Owned(s) => visitor.visit_str(s), + }, + BorrowedValue::Integer(integer) => integer.deserialize_any(visitor), + BorrowedValue::Float(float) => visitor.visit_f64(float), + BorrowedValue::Boolean(boolean) => visitor.visit_bool(boolean), + BorrowedValue::Null => visitor.visit_unit(), + BorrowedValue::Array(ref mut items) => { + let mut seq = Vec::new(); + std::mem::swap(items, &mut seq); + + visitor.visit_seq(SeqAccess::new(seq)) + } + BorrowedValue::Object(ref mut object) => { + let mut map = IndexMap::new(); + std::mem::swap(object, &mut map); + + visitor.visit_map(MapAccess::new(map)) + } + } } - fn deserialize_u64(self, visitor: V) -> std::result::Result + fn deserialize_bool(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (u64)", Value::Integer(val) => visitor.visit_u64(val as u64)) + match self.value { + BorrowedValue::Boolean(boolean) => visitor.visit_bool(boolean), + ref value => Err(value.invalid_type("Boolean")), + } } - fn deserialize_f32(self, visitor: V) -> std::result::Result + deserialize_number!(deserialize_i8); + deserialize_number!(deserialize_i16); + deserialize_number!(deserialize_i32); + deserialize_number!(deserialize_i64); + deserialize_number!(deserialize_u8); + deserialize_number!(deserialize_u16); + deserialize_number!(deserialize_u32); + deserialize_number!(deserialize_u64); + + fn deserialize_f32(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "float (f32)", Value::Float(val) => visitor.visit_f32(val as f32)) + self.deserialize_f64(visitor) } - fn deserialize_f64(self, visitor: V) -> std::result::Result + fn deserialize_f64(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "float (f64)", Value::Float(val) => visitor.visit_f64(val)) + match self.value { + BorrowedValue::Float(float) => visitor.visit_f64(float), + ref value => Err(value.invalid_type("Float")), + } } - fn deserialize_char(self, visitor: V) -> std::result::Result + fn deserialize_char(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - let char = match value { - Value::String(value) => value.chars().next(), - _ => return err_expected!("char", value), - }; - - match char { - Some(char) => visitor.visit_char(char), - None => err_expected!("char", "empty string"), - } + self.deserialize_str(visitor) } - fn deserialize_str(self, visitor: V) -> std::result::Result + fn deserialize_str(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "string", - Value::String(val) => visitor.visit_str(&val) - ) + match self.value { + BorrowedValue::String(ref string) => match string { + Cow::Borrowed(s) => visitor.visit_borrowed_str(s), + Cow::Owned(s) => visitor.visit_str(s), + }, + ref value => Err(value.invalid_type("String")), + } } - fn deserialize_string(self, visitor: V) -> std::result::Result + fn deserialize_string(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { self.deserialize_str(visitor) } - fn deserialize_bytes(self, visitor: V) -> std::result::Result + fn deserialize_bytes(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "bytes array", - Value::String(val) => visitor.visit_bytes(val.as_bytes()) - ) + match self.value { + BorrowedValue::String(ref string) => visitor.visit_bytes(string.as_bytes()), + ref value => Err(value.invalid_type("Byte String")), + } } - fn deserialize_byte_buf(self, visitor: V) -> std::result::Result + fn deserialize_byte_buf(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { self.deserialize_bytes(visitor) } - fn deserialize_option(self, visitor: V) -> std::result::Result + fn deserialize_option(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - match value { - Value::Null(_) => visitor.visit_none(), - _ => visitor.visit_some(&mut Deserializer::from_value(value)), + match self.value { + BorrowedValue::Null => visitor.visit_none(), + _ => visitor.visit_some(self), } } - fn deserialize_unit(self, visitor: V) -> std::result::Result + fn deserialize_unit(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - visitor.visit_unit() + match self.value { + BorrowedValue::Null => visitor.visit_unit(), + ref value => Err(value.invalid_type("Null")), + } } fn deserialize_unit_struct( self, _name: &'static str, visitor: V, - ) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { self.deserialize_unit(visitor) } @@ -264,31 +428,31 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { self, _name: &'static str, visitor: V, - ) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { visitor.visit_newtype_struct(self) } - fn deserialize_seq(self, visitor: V) -> std::result::Result + fn deserialize_seq(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - match value { - Value::Array(_) => visitor.visit_seq(Seq::new(value)), - _ => err_expected!("array", value), + match self.value { + BorrowedValue::Array(ref mut items) => { + let mut seq = Vec::new(); + std::mem::swap(items, &mut seq); + + visitor.visit_seq(SeqAccess::new(seq)) + } + ref value => Err(value.invalid_type("Array")), } } - fn deserialize_tuple( - self, - _len: usize, - visitor: V, - ) -> std::result::Result + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { self.deserialize_seq(visitor) } @@ -298,21 +462,25 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { _name: &'static str, _len: usize, visitor: V, - ) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { self.deserialize_seq(visitor) } - fn deserialize_map(self, visitor: V) -> std::result::Result + fn deserialize_map(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - match value { - Value::Object(_) => visitor.visit_map(Map::new(value)), - _ => err_expected!("object", value), + match self.value { + BorrowedValue::Object(ref mut object) => { + let mut map = IndexMap::new(); + std::mem::swap(object, &mut map); + + visitor.visit_map(MapAccess::new(map)) + } + ref value => Err(value.invalid_type("Object")), } } @@ -321,9 +489,9 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { _name: &'static str, _fields: &'static [&'static str], visitor: V, - ) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { self.deserialize_map(visitor) } @@ -333,197 +501,186 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { _name: &'static str, _variants: &'static [&'static str], visitor: V, - ) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - match value { - Value::Object(_) => visitor.visit_enum(Enum::new(value)), - Value::String(val) => visitor.visit_enum(val.into_deserializer()), - _ => err_expected!("object or string (enum variant)", value), + match self.value { + BorrowedValue::String(ref string) => { + visitor.visit_enum(string.as_ref().into_deserializer()) + } + BorrowedValue::Object(ref mut object) => { + let mut map = IndexMap::new(); + std::mem::swap(object, &mut map); + + visitor.visit_enum(EnumAccess::new(map)) + } + ref value => Err(value.invalid_type("String or Object")), } } - fn deserialize_identifier(self, visitor: V) -> std::result::Result + fn deserialize_identifier(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { self.deserialize_str(visitor) } - fn deserialize_ignored_any(self, visitor: V) -> std::result::Result + fn deserialize_ignored_any(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { self.deserialize_any(visitor) } } -struct Map<'de> { - values: VecDeque>, +struct SeqAccess<'de> { + items: std::vec::IntoIter>, } -impl<'de> Map<'de> { - fn new(value: Value<'de>) -> Self { - match value { - Value::Object(values) => Self { - values: values - .into_iter() - .flat_map(|(key, value)| vec![Value::String(key), value]) - .collect(), - }, - _ => unreachable!(), +impl<'de> SeqAccess<'de> { + pub fn new(items: Vec>) -> Self { + Self { + items: items.into_iter(), } } } -impl<'de> de::MapAccess<'de> for Map<'de> { +impl<'de> de::SeqAccess<'de> for SeqAccess<'de> { type Error = Error; - fn next_key_seed(&mut self, seed: K) -> std::result::Result, Self::Error> + fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> where - K: DeserializeSeed<'de>, + T: de::DeserializeSeed<'de>, { - if let Some(value) = self.values.pop_front() { - seed.deserialize(&mut Deserializer::from_value(value)) - .map(Some) - } else { - Ok(None) - } - } - - fn next_value_seed(&mut self, seed: V) -> std::result::Result - where - V: DeserializeSeed<'de>, - { - match self.values.pop_front() { - Some(value) => seed.deserialize(&mut Deserializer::from_value(value)), - None => Err(Error::DeserializationError( - "Expected value to exist".to_string(), - )), + match self.items.next() { + Some(item) => { + let mut deserializer = Deserializer::with_value(item); + seed.deserialize(&mut deserializer).map(Some) + } + None => Ok(None), } } - - fn size_hint(&self) -> Option { - Some(self.values.len() / 2) - } } -struct Seq<'de> { - values: VecDeque>, +struct MapAccess<'de> { + items: indexmap::map::IntoIter, BorrowedValue<'de>>, + current_value: Option>, } -impl<'de> Seq<'de> { - fn new(value: Value<'de>) -> Self { - match value { - Value::Array(values) => Self { - values: VecDeque::from(values), - }, - _ => unreachable!(), +impl<'de> MapAccess<'de> { + fn new(items: IndexMap, BorrowedValue<'de>>) -> Self { + Self { + items: items.into_iter(), + current_value: None, } } } -impl<'de> de::SeqAccess<'de> for Seq<'de> { +impl<'de> de::MapAccess<'de> for MapAccess<'de> { type Error = Error; - fn next_element_seed( - &mut self, - seed: T, - ) -> std::result::Result, Self::Error> + fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> where - T: DeserializeSeed<'de>, + K: de::DeserializeSeed<'de>, { - if let Some(value) = self.values.pop_front() { - seed.deserialize(&mut Deserializer::from_value(value)) - .map(Some) - } else { - Ok(None) + match self.items.next() { + Some((key, value)) => { + self.current_value = Some(value); + let mut key_deserializer = Deserializer::with_value(BorrowedValue::String(key)); + seed.deserialize(&mut key_deserializer).map(Some) + } + None => Ok(None), } } - fn size_hint(&self) -> Option { - Some(self.values.len()) + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + match self.current_value.take() { + Some(value) => { + let mut deserializer = Deserializer::with_value(value); + seed.deserialize(&mut deserializer) + } + None => Err(Error::DeserializationError( + "No value available".to_string(), + )), + } } } - -struct Enum<'de> { - value: Value<'de>, +struct EnumAccess<'de> { + object: BorrowedObject<'de>, } -impl<'de> Enum<'de> { - fn new(value: Value<'de>) -> Self { - Self { value } +impl<'de> EnumAccess<'de> { + fn new(object: BorrowedObject<'de>) -> Self { + Self { object } } } -impl<'de> EnumAccess<'de> for Enum<'de> { +impl<'de> de::EnumAccess<'de> for EnumAccess<'de> { type Error = Error; - type Variant = Variant<'de>; + type Variant = VariantAccess<'de>; fn variant_seed(self, seed: V) -> std::result::Result<(V::Value, Self::Variant), Self::Error> where - V: DeserializeSeed<'de>, + V: de::DeserializeSeed<'de>, { - match self.value { - Value::String(_) => { - let value = seed.deserialize(&mut Deserializer::from_value(self.value))?; - Ok((value, Variant::new(None))) - } - Value::Object(obj) => { - let first_pair = obj.into_iter().next(); - if let Some(first_pair) = first_pair { - let value = Value::String(first_pair.0); - let tag = seed.deserialize(&mut Deserializer::from_value(value))?; - Ok((tag, Variant::new(Some(first_pair.1)))) - } else { - Err(Error::DeserializationError( - "Cannot deserialize empty object into enum".to_string(), - )) - } - } - _ => unreachable!(), + if self.object.len() != 1 { + return Err(Error::DeserializationError(format!( + "Expected enum object with exactly one key, found {}", + self.object.len() + ))); } + + let (key, value) = self + .object + .into_iter() + .next() + .expect("Internal variant error"); + + let mut key_deserializer = Deserializer::with_value(BorrowedValue::String(key)); + let variant = seed.deserialize(&mut key_deserializer)?; + + Ok((variant, VariantAccess::new(value))) } } -struct Variant<'de> { - value: Option>, +struct VariantAccess<'de> { + value: BorrowedValue<'de>, } -impl<'de> Variant<'de> { - fn new(value: Option>) -> Self { +impl<'de> VariantAccess<'de> { + fn new(value: BorrowedValue<'de>) -> Self { Self { value } } } -impl<'de> VariantAccess<'de> for Variant<'de> { +impl<'de> de::VariantAccess<'de> for VariantAccess<'de> { type Error = Error; fn unit_variant(self) -> std::result::Result<(), Self::Error> { - Ok(()) + match self.value { + BorrowedValue::Null => Ok(()), + ref value => Err(value.invalid_type("unit variant (null)")), + } } fn newtype_variant_seed(self, seed: T) -> std::result::Result where - T: DeserializeSeed<'de>, + T: de::DeserializeSeed<'de>, { - match self.value { - Some(value) => seed.deserialize(&mut Deserializer::from_value(value)), - None => Err(Error::DeserializationError( - "Expected value to exist".to_string(), - )), - } + seed.deserialize(&mut Deserializer::with_value(self.value)) } fn tuple_variant(self, _len: usize, visitor: V) -> std::result::Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { match self.value { - Some(value) if matches!(value, Value::Array(_)) => visitor.visit_seq(Seq::new(value)), - _ => unreachable!(), + BorrowedValue::Array(items) => visitor.visit_seq(SeqAccess::new(items)), + ref value => Err(value.invalid_type("tuple variant (array)")), } } @@ -533,11 +690,11 @@ impl<'de> VariantAccess<'de> for Variant<'de> { visitor: V, ) -> std::result::Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { match self.value { - Some(value) if matches!(value, Value::Object(_)) => visitor.visit_map(Map::new(value)), - _ => unreachable!(), + BorrowedValue::Object(object) => visitor.visit_map(MapAccess::new(object)), + ref value => Err(value.invalid_type("struct variant (object)")), } } } diff --git a/src/error.rs b/src/error.rs index cf0e631..c34b70b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,32 +1,27 @@ -use std::fmt::{Debug, Display}; +use std::fmt::Display; use thiserror::Error; -use crate::Rule; - -pub type Result = std::result::Result; +pub type Result = std::result::Result; #[derive(Error, Debug)] pub enum Error { #[error(transparent)] Io(#[from] std::io::Error), - #[error(transparent)] - ParserError(#[from] Box>), - #[error("failed to resolve referenced input `{0}`")] InputResolveError(String), - #[error("attempted to use dot-notation on non-object value at `{0}`")] - InvalidPathError(String), + #[error("attempted to spread a type that differs from its containing type")] + InvalidSpreadError, - #[error("attempted to spread a type that differs from its containing type at `{0}`")] - InvalidSpreadError(String), - - #[error("attempted to interpolate a non-string type into a string at `{0}`")] - InvalidInterpolationError(String), + #[error("attempted to interpolate a non-string type into a string")] + InvalidInterpolationError, #[error("failed to deserialize input: {0}")] DeserializationError(String), + + #[error("failed to parse input: {0}")] + ParseError(String), } impl serde::de::Error for Error { diff --git a/src/grammar.pest b/src/grammar.pest deleted file mode 100644 index 2cbe2c4..0000000 --- a/src/grammar.pest +++ /dev/null @@ -1,93 +0,0 @@ -WHITESPACE = _{ " " | "\t" | "\r" | "\n" } -COMMENT = _{ "//" ~ (!"\n" ~ ANY)* } - -object = { - "{" - ~ object_value* - ~ "}" -} - -object_value = _{ - pair | spread -} - -spread = { - ".." ~ input -} - -array = { - "[" - ~ array_value* - ~ "]" -} - -array_value = _{ - value | spread -} - -pair = { path ~ "=" ~ value } - -path = ${ - path_seg - ~ ( "." ~ path_seg )* -} - -path_seg = _{ - quoted_path_seg | regular_path_seg -} - -quoted_path_seg = ${ "'" ~ quoted_path_val ~ "'" } -quoted_path_val = ${ quoted_path_char + } -quoted_path_char = _{ - !("'" | "\\") ~ ANY - | "\\" ~ "'" -} - -regular_path_seg = ${ path_char + } - -path_char = _{ !( WHITESPACE | "=" | "." ) ~ ANY } - -value = _{ object | array | input | string | float | integer | boolean | null } - -boolean = { "true" | "false" } -null = { "null" } - -string = ${ - "\"" ~ string_val ~ "\"" -} - -string_val = ${ (input | char)* } - -char = { - !("\"" | "\\") ~ ANY - | "\\" ~ ("\"" | "\\" | "n" | "r" | "t" | "$") - | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) -} - -integer = ${ - hex_integer | decimal_integer -} - -decimal_integer = @{ - "-"? - ~ ("0" | ASCII_NONZERO_DIGIT ~ ("_"? ~ ASCII_DIGIT)*) -} - -hex_integer = @{ - "0x" ~ ASCII_HEX_DIGIT+ -} - -float = @{ - "-"? - ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) - ~ ("." ~ ASCII_DIGIT*) - ~ (^"e" ~ ("+" | "-")? ~ ASCII_DIGIT+)? -} - -input = ${ !"\\" ~ "$" ~ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")+ } - -assignment = { input ~ "=" ~ value } - -assign_block = { "let" ~ "{" ~ assignment* ~ "}" ~ "in" } - -config = _{ SOI ~ assign_block? ~ object ~ EOI } diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..0e602ce --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,276 @@ +use std::{ + borrow::Cow, + fmt, + num::{ParseFloatError, ParseIntError}, + str::FromStr, +}; +use thiserror::Error; + +use logos::{Logos, SpannedIter}; + +use crate::Integer; + +pub type Spanned = Result<(Loc, Tok, Loc), Error>; + +pub struct Lexer<'input> { + // instead of an iterator over characters, we have a token iterator + token_stream: SpannedIter<'input, Token<'input>>, +} + +impl<'input> Lexer<'input> { + pub fn new(input: &'input str) -> Self { + Self { + token_stream: Token::lexer(input).spanned(), + } + } +} + +impl<'input> Iterator for Lexer<'input> { + type Item = Spanned, usize, LexicalError>; + + fn next(&mut self) -> Option { + self.token_stream + .next() + .map(|(token, span)| Ok((span.start, token?, span.end))) + } +} + +#[derive(Debug, Default, Error, Clone, PartialEq, Eq)] +pub enum LexicalError { + #[error("Integer parsing error: {0}")] + InvalidInteger(#[from] ParseIntError), + #[error("Float parsing error: {0}")] + InvalidFloat(#[from] ParseFloatError), + #[default] + #[error("Encountered invalid token")] + InvalidToken, +} + +#[derive(Logos, Debug, Clone, PartialEq)] +#[logos(error = LexicalError)] +#[logos(skip r"[\s\t\r\n\f]+")] // Whitespace +#[logos(skip r"//[^\n\r]*[\n\r]*")] // Inline comments +#[logos(skip r"/\*([^*/]|\*[^/]|/[^*])*\*/")] // Multiline comments +pub enum Token<'input> { + #[token("let")] + Let, + + #[token("in")] + In, + + #[token("null")] + Null, + + #[token("=")] + Equals, + + #[token("{")] + OpenBrace, + + #[token("}")] + CloseBrace, + + #[token("[")] + OpenBracket, + + #[token("]")] + CloseBracket, + + #[token(".")] + Chain, + + #[token("..")] + Spread, + + #[token("false", |_| false)] + #[token("true", |_| true)] + Boolean(bool), + + #[regex(r"-?0x[0-9a-fA-F]+(_[0-9a-fA-F]+)*", |lex| parse_radix_integer(lex, 16))] + #[regex(r"-?0o[0-7]+(_[0-7]+)*", |lex| parse_radix_integer(lex, 8))] + #[regex(r"-?0b[01]+(_[01]+)*", |lex| parse_radix_integer(lex, 2))] + #[regex(r"-[0-9]+(_[0-9]+)*", parse_decimal::)] + #[regex(r"[0-9]+(_[0-9]+)*", parse_decimal::)] + Integer(Integer), + + #[regex(r"-?[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?", |lex| lex.slice().parse::())] + Float(f64), + + #[regex(r"\$[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().trim_start_matches('$'))] + InputName(&'input str), + + #[token("\"", parse_literal)] + Literal(Vec>), + + #[regex(r#"'(?:[^'\\]|\\.)*'|[^\s.=0-9\[\]{}"'-][^\s.=\[\]{}"']*"#, |lex| lex.slice().trim_matches('\''))] + Key(&'input str), +} + +/// Parse normal decimal integer, removes underscores +fn parse_decimal<'input, N>( + lex: &mut logos::Lexer<'input, Token<'input>>, +) -> Result +where + N: Into + FromStr, +{ + lex.slice().replace("_", "").parse().map(N::into) +} + +/// Parse integer with specified radix, handling negative values and underscores +fn parse_radix_integer<'input>( + lex: &mut logos::Lexer<'input, Token<'input>>, + radix: u32, +) -> Result { + let input = lex.slice().replace("_", ""); + let is_negative = input.starts_with('-'); + let prefix_len = if is_negative { 3 } else { 2 }; // Skip "-0x"/"0x" etc. + let number_part = &input[prefix_len..]; + + if is_negative { + i64::from_str_radix(number_part, radix).map(|n| Integer::from(-n)) + } else { + u64::from_str_radix(number_part, radix).map(Integer::from) + } +} + +#[derive(Logos, Debug, PartialEq, Clone)] +enum StringContext<'input> { + #[token("\"")] + Quote, + #[regex(r#"[^\"$\\{]+"#)] + Content, + + #[token("\\n")] + NewlineEscape, + #[token("\\r")] + CarriageReturnEscape, + #[token("\\t")] + TabEscape, + #[token("\\\\")] + BackslashEscape, + #[token("\\\"")] + QuoteEscape, + #[token("\\$")] + DollarEscape, + #[token("\\{")] + OpenBraceEscape, + #[token("\\}")] + CloseBraceEscape, + #[regex(r"\\u\{[0-9a-fA-F]{4,6}\}")] + UnicodeEscape, + + #[regex(r"\$\{[a-zA-Z_][a-zA-Z0-9_]*\}", |lex| lex.slice())] + Interpolation(&'input str), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum StringPart<'input> { + Literal(Cow<'input, str>), + Input(&'input str), +} + +fn parse_literal<'input>( + lex: &mut logos::Lexer<'input, Token<'input>>, +) -> Option>> { + let mut string_lex = lex.clone().morph::(); + + let mut parts = Vec::new(); + let mut current_literal = String::new(); + + while let Some(Ok(token)) = string_lex.next() { + match token { + StringContext::Quote => break, + StringContext::Content => current_literal.push_str(string_lex.slice()), + StringContext::Interpolation(input) => { + if !current_literal.is_empty() { + parts.push(StringPart::Literal(Cow::Owned(std::mem::take( + &mut current_literal, + )))); + } + + parts.push(StringPart::Input( + input.trim_start_matches("${").trim_end_matches('}'), + )) + } + StringContext::NewlineEscape => { + current_literal.push('\n'); + } + StringContext::CarriageReturnEscape => { + current_literal.push('\r'); + } + StringContext::TabEscape => { + current_literal.push('\t'); + } + StringContext::BackslashEscape => { + current_literal.push('\\'); + } + StringContext::QuoteEscape => { + current_literal.push('"'); + } + StringContext::DollarEscape => { + current_literal.push('$'); + } + StringContext::OpenBraceEscape => { + current_literal.push('{'); + } + StringContext::CloseBraceEscape => { + current_literal.push('}'); + } + StringContext::UnicodeEscape => { + let slice = string_lex.slice(); + let hex_part = &slice[3..slice.len() - 1]; + + if let Ok(code) = u32::from_str_radix(hex_part, 16) { + if let Some(unicode_char) = char::from_u32(code) { + current_literal.push(unicode_char); + continue; + } + } + + current_literal.push('\u{FFFD}'); + } + } + } + + if !current_literal.is_empty() { + parts.push(StringPart::Literal(Cow::Owned(std::mem::take( + &mut current_literal, + )))); + } + + *lex = string_lex.morph(); + + Some(parts) +} + +impl fmt::Display for Token<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Let => write!(f, "let"), + Self::In => write!(f, "in"), + Self::Null => write!(f, "null"), + Self::Equals => write!(f, "="), + Self::OpenBrace => write!(f, "{{"), + Self::CloseBrace => write!(f, "}}"), + Self::OpenBracket => write!(f, "["), + Self::CloseBracket => write!(f, "]"), + Self::Chain => write!(f, "."), + Self::Spread => write!(f, ".."), + Self::Literal(parts) => { + for part in parts { + match part { + StringPart::Literal(lit) => write!(f, "{lit}")?, + StringPart::Input(input) => write!(f, "{input}")?, + } + } + + Ok(()) + } + Self::Integer(int) => int.fmt(f), + Self::Float(float) => float.fmt(f), + Self::Boolean(bool) => bool.fmt(f), + Self::InputName(name) => name.fmt(f), + Self::Key(key) => key.fmt(f), + } + } +} diff --git a/src/lib.rs b/src/lib.rs index f06dfb9..4affd20 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,3 @@ -use indexmap::IndexMap; -use serde::Serialize; -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt::{Display, Formatter}; - -pub use crate::de::{from_slice, from_str}; -pub use crate::parser::{parse, Rule}; - -pub mod error; -mod parser; - mod de; #[cfg(any( feature = "lua51", @@ -23,50 +11,13 @@ mod lua; #[cfg(feature = "wasm")] mod wasm; -/// A map of input names and values. -/// The names include their `$` prefix. -pub type Inputs<'a> = HashMap<&'a str, Value<'a>>; - -/// A map of keys to their values. -pub type Object<'a> = IndexMap, Value<'a>>; +mod error; +pub(crate) mod value; -#[derive(Serialize, Debug, Clone)] -#[serde(untagged)] -pub enum Value<'a> { - /// Key/value map. Values can be mixed types. - Object(Object<'a>), - /// Array of values, can be mixed types. - Array(Vec>), - /// UTF-8 string - String(Cow<'a, str>), - /// 64-bit signed integer. - Integer(i64), - /// 64-bit (double precision) floating point number. - Float(f64), - /// true or false - Boolean(bool), - /// `null` literal. - /// - /// Takes an optional unit type as the `toml` crate - /// errors when encountering unit types, - /// but can handle `None` types. - Null(Option<()>), -} +pub mod ast; +pub mod lexer; +lalrpop_util::lalrpop_mod!(pub parser, "/corn.rs"); -impl Display for Value<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - match self { - Value::Object(_) => "object", - Value::Array(_) => "array", - Value::String(_) => "string", - Value::Integer(_) => "integer", - Value::Float(_) => "float", - Value::Boolean(_) => "boolean", - Value::Null(_) => "null", - } - ) - } -} +pub use de::{from_str, parse, Deserializer}; +pub use error::{Error, Result}; +pub use value::{BorrowedObject, BorrowedValue, Integer, Object, Value}; diff --git a/src/lua.rs b/src/lua.rs index 539b992..c6ae350 100644 --- a/src/lua.rs +++ b/src/lua.rs @@ -1,14 +1,14 @@ use crate::Value; use mlua::prelude::*; -impl IntoLua for Value<'_> { +impl IntoLua for Value { fn into_lua(self, lua: &Lua) -> LuaResult { lua.to_value(&self) } } fn lua_parse(lua: &Lua, config: String) -> LuaResult { - let res = crate::parse(&config); + let res = crate::from_str::(&config); match res { Ok(v) => Ok(lua.to_value(&v)?), Err(e) => Err(LuaError::RuntimeError(e.to_string())), diff --git a/src/parser.rs b/src/parser.rs deleted file mode 100644 index 248743f..0000000 --- a/src/parser.rs +++ /dev/null @@ -1,425 +0,0 @@ -use indexmap::IndexMap; -use std::borrow::Cow; -use std::collections::HashMap; -use std::env::var; -use std::fmt::Formatter; - -use pest::iterators::Pair; -use pest::Parser; - -use crate::error::{Error, Result}; -use crate::{Inputs, Object, Value}; - -#[derive(pest_derive::Parser)] -#[grammar = "grammar.pest"] -pub struct AstParser; - -impl std::fmt::Display for Rule { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{self:?}") - } -} - -struct CornParser<'a> { - input_block: Option>, - inputs: Inputs<'a>, -} - -impl<'a> CornParser<'a> { - pub fn new(input_block: Option>) -> Self { - let inputs = HashMap::new(); - Self { - input_block, - inputs, - } - } - - pub fn parse(mut self, object_block: Pair<'a, Rule>) -> Result> { - if let Some(input_block) = self.input_block.take() { - self.parse_assign_block(input_block)?; - } - - let value_block = self.parse_object(object_block)?; - Ok(Value::Object(value_block)) - } - - /// Parses a pair of tokens (marked as a `Rule`) into a `Value`. - fn parse_value(&self, pair: Pair<'a, Rule>) -> Result> { - match pair.as_rule() { - Rule::object => Ok(Value::Object(self.parse_object(pair)?)), - Rule::array => Ok(Value::Array(self.parse_array(pair)?)), - Rule::string => Ok(Value::String(self.parse_string(pair)?)), - Rule::integer => Ok(Value::Integer(Self::parse_integer(pair))), - Rule::float => Ok(Value::Float(Self::parse_float(&pair))), - Rule::boolean => Ok(Value::Boolean(Self::parse_bool(&pair))), - Rule::null => Ok(Value::Null(None)), - Rule::input => { - let key = pair.as_str(); - self.get_input(key) - } - _ => unreachable!(), - } - } - - fn parse_bool(pair: &Pair<'_, Rule>) -> bool { - assert_eq!(pair.as_rule(), Rule::boolean); - match pair.as_str() { - "true" => true, - "false" => false, - _ => unreachable!(), - } - } - - fn parse_integer(pair: Pair<'_, Rule>) -> i64 { - assert_eq!(pair.as_rule(), Rule::integer); - let sub_pair = pair - .into_inner() - .next() - .expect("integers should contain a sub-rule of their type"); - - match sub_pair.as_rule() { - Rule::decimal_integer => sub_pair - .as_str() - .replace('_', "") - .parse() - .expect("decimal integer rules should match valid rust integers"), - Rule::hex_integer => i64::from_str_radix(&sub_pair.as_str()[2..], 16) - .expect("hex integer rules contain valid hex values"), - _ => unreachable!(), - } - } - - fn parse_float(pair: &Pair<'_, Rule>) -> f64 { - assert_eq!(pair.as_rule(), Rule::float); - pair.as_str() - .parse() - .expect("float rules should match valid rust floats") - } - - /// Collects each `char` in a `Rule::string` - /// to form a single `String`. - fn parse_string(&self, pair: Pair<'a, Rule>) -> Result> { - assert_eq!(pair.as_rule(), Rule::string); - - let mut full_string = String::new(); - - let pairs = pair - .into_inner() - .next() - .expect("string rules should contain a valid string value") - .into_inner(); - - for pair in pairs { - match pair.as_rule() { - Rule::char => full_string.push(Self::parse_char(&pair)), - Rule::input => { - let input_name = pair.as_str(); - let value = self.get_input(input_name)?; - match value { - Value::String(value) => full_string.push_str(&value), - _ => return Err(Error::InvalidInterpolationError(input_name.to_string())), - } - } - _ => unreachable!(), - }; - } - - let full_string = if full_string.contains('\n') { - trim_multiline_string(&full_string) - } else { - full_string - }; - - Ok(Cow::Owned(full_string)) - } - - fn parse_char(pair: &Pair<'a, Rule>) -> char { - let str = pair.as_str(); - let mut chars = str.chars(); - - let first_char = chars.next().expect("character to exist"); - if first_char != '\\' { - return first_char; - } - - let second_char = chars.next().expect("character to exist"); - if second_char != 'u' { - return match second_char { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '"' => '\"', - '$' => '$', - '\\' => '\\', - _ => unreachable!(), - }; - } - - let num = - u32::from_str_radix(&str[3..], 16).expect("valid hex characters to exist after \\u"); - char::from_u32(num).unwrap_or('\u{FFFD}') - } - - /// Parses each rule in a `Rule::array` - /// to form a vector of `Value`s. - fn parse_array(&self, block: Pair<'a, Rule>) -> Result>> { - assert_eq!(block.as_rule(), Rule::array); - - let mut arr = vec![]; - - for pair in block.into_inner() { - match pair.as_rule() { - Rule::spread => { - let input = pair - .into_inner() - .next() - .expect("spread operators should contain an input"); - - let input_name = input.as_str(); - let value = self.parse_value(input)?; - - match value { - Value::Array(other) => arr.extend(other), - _ => return Err(Error::InvalidSpreadError(input_name.to_string())), - } - } - _ => arr.push(self.parse_value(pair)?), - }; - } - - Ok(arr) - } - - /// Parses each key/value pair in a `Rule::object` - /// to form a `IndexMap` of Values. - /// - /// An `IndexMap` is used to ensure keys - /// always output in the same order. - fn parse_object(&self, block: Pair<'a, Rule>) -> Result> { - assert_eq!(block.as_rule(), Rule::object); - - let mut obj = IndexMap::new(); - - for pair in block.into_inner() { - match pair.as_rule() { - Rule::pair => { - let mut path_rules = pair.into_inner(); - - let path = path_rules - .next() - .expect("object pairs should contain a key"); - - let paths = Self::parse_path(path); - - let value = self.parse_value( - path_rules - .next() - .expect("object pairs should contain a value"), - )?; - - obj = Self::add_at_path(obj, &paths, value)?; - } - Rule::spread => { - let input = pair - .into_inner() - .next() - .expect("spread operators should contain an input"); - - let input_name = input.as_str(); - let value = self.parse_value(input)?; - - match value { - Value::Object(other) => obj.extend(other), - _ => return Err(Error::InvalidSpreadError(input_name.to_string())), - } - } - _ => unreachable!(), - } - } - - Ok(obj) - } - - fn parse_path(path: Pair) -> Vec> { - path.into_inner() - .map(|pair| match pair.as_rule() { - Rule::regular_path_seg => Cow::Borrowed(pair.as_str()), - Rule::quoted_path_seg => Cow::Owned( - pair.into_inner() - .next() - .expect("quoted paths should contain an inner value") - .as_str() - .replace('\\', ""), - ), - _ => unreachable!(), - }) - .collect::>() - } - - /// Adds `Value` at the `path` in `obj`. - /// - /// `path` is an array where each entry represents another object key, - /// for example `foo.bar` is represented as `["foo", "bar"]`. - /// - /// Objects are created up to the required depth recursively. - fn add_at_path( - mut obj: Object<'a>, - path: &[Cow<'a, str>], - value: Value<'a>, - ) -> Result> { - let (part, path_rest) = path - .split_first() - .expect("paths should contain at least 1 segment"); - - if path_rest.is_empty() { - obj.insert(part.clone(), value); - return Ok(obj); - } - - let child_obj = obj - .shift_remove(part) - .unwrap_or_else(|| Value::Object(IndexMap::new())); - - match child_obj { - Value::Object(map) => { - obj.insert( - part.clone(), - Value::Object(Self::add_at_path(map, path_rest, value)?), - ); - - Ok(obj) - } - _ => Err(Error::InvalidPathError(path.join("."))), - } - } - - /// Parses the `let { } in` block at the start of files. - /// Each input is inserted into into `self.inputs`. - fn parse_assign_block(&mut self, block: Pair<'a, Rule>) -> Result<()> { - assert_eq!(block.as_rule(), Rule::assign_block); - - for pair in block.into_inner() { - let mut assign_rules = pair.into_inner(); - let name = assign_rules - .next() - .expect("input assignments should have a name") - .as_str(); - - let value = self.parse_value( - assign_rules - .next() - .expect("input assignments should have a value"), - )?; - - self.inputs.insert(name, value); - } - - Ok(()) - } - - /// Attempts to get an input value from the `inputs` map. - /// If the `key` starts with `$env_` the system environment variables will be consulted first. - fn get_input(&self, key: &'a str) -> Result> { - if let Some(env_name) = key.strip_prefix("$env_") { - let var = var(env_name); - - if let Ok(var) = var { - return Ok(Value::String(Cow::Owned(var))); - } - } - - if let Some(value) = self.inputs.get(key) { - Ok(value.clone()) - } else { - Err(Error::InputResolveError(key.to_string())) - } - } -} - -/// Takes a multiline string and trims the maximum amount of -/// whitespace at the start of each line -/// while preserving formatting. -/// -/// Based on code from `indoc` crate: -/// -fn trim_multiline_string(string: &str) -> String { - let ignore_first_line = string.starts_with('\n') || string.starts_with("\r\n"); - - let spaces = string - .lines() - .skip(1) - .map(|line| line.chars().take_while(char::is_ascii_whitespace).count()) - .min() - .unwrap_or_default(); - - let mut result = String::with_capacity(string.len()); - for (i, line) in string.lines().enumerate() { - if i > 1 || (i == 1 && !ignore_first_line) { - result.push('\n'); - } - if i == 0 { - // Do not un-indent anything on same line as opening quote - result.push_str(line); - } else if line.len() > spaces { - // Whitespace-only lines may have fewer than the number of spaces - // being removed - result.push_str(&line[spaces..]); - } - } - result -} - -/// Parses the input string into a `Config` -/// containing the resolved inputs -/// and a map of values representing the top-level object. -/// -/// # Examples -/// -/// ```rust -/// use corn::parse; -/// -/// let corn = "{foo = 42}"; -/// -/// let config = parse(corn).unwrap(); -/// let json = serde_json::to_string(&config).unwrap(); -/// -/// assert_eq!(json, "{\"foo\":42}"); -/// ``` -/// -/// # Errors -/// -/// Will fail if the input contains a syntax error. -/// Will fail if the input contains invalid Corn for another reason, -/// including references to undefined inputs or dot-notation for non-object values. -/// Will fail if the input cannot be deserialized for any reaon. -/// -/// Any of the above will return a specific error type with details. -/// -/// # Panics -/// -/// If the internal AST parser produces a tree in an invalid structure, -/// the function will panic. -/// This indicates a severe error in the library and should never occur. -pub fn parse(file: &str) -> Result { - let rules = AstParser::parse(Rule::config, file); - - match rules { - Ok(mut rules) => { - let first_block = rules.next().expect("should be at least 1 rule"); - - match first_block.as_rule() { - Rule::assign_block => { - let parser = CornParser::new(Some(first_block)); - let object_block = rules.next().expect("should always be an object block"); - parser.parse(object_block) - } - Rule::object => { - let parser = CornParser::new(None); - parser.parse(first_block) - } - _ => unreachable!(), - } - } - Err(error) => Err(Error::ParserError(Box::new(error))), - } -} diff --git a/src/value/de.rs b/src/value/de.rs new file mode 100644 index 0000000..5b80b29 --- /dev/null +++ b/src/value/de.rs @@ -0,0 +1,114 @@ +use serde::{de::Visitor, Deserialize}; + +use crate::{Object, Value}; + +impl<'de> Deserialize<'de> for Value { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct ValueVisitor; + + impl<'de> Visitor<'de> for ValueVisitor { + type Value = Value; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("any valid Corn value") + } + + #[inline] + fn visit_bool(self, v: bool) -> Result + where + E: serde::de::Error, + { + Ok(Value::Boolean(v)) + } + + #[inline] + fn visit_i64(self, v: i64) -> Result + where + E: serde::de::Error, + { + Ok(Value::Integer(v.into())) + } + + #[inline] + fn visit_u64(self, v: u64) -> Result + where + E: serde::de::Error, + { + Ok(Value::Integer(v.into())) + } + + fn visit_f64(self, v: f64) -> Result + where + E: serde::de::Error, + { + Ok(Value::Float(v)) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + self.visit_string(String::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: serde::de::Error, + { + Ok(Value::String(v)) + } + + fn visit_none(self) -> Result + where + E: serde::de::Error, + { + Ok(Value::Null) + } + + fn visit_some(self, deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + Deserialize::deserialize(deserializer) + } + + fn visit_unit(self) -> Result + where + E: serde::de::Error, + { + Ok(Value::Null) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let mut vec = Vec::new(); + + while let Some(elem) = seq.next_element()? { + vec.push(elem); + } + + Ok(Value::Array(vec)) + } + + fn visit_map(self, mut map: A) -> Result + where + A: serde::de::MapAccess<'de>, + { + let mut dict = Object::new(); + + while let Some((key, value)) = map.next_entry()? { + dict.insert(key, value); + } + + Ok(Value::Object(dict)) + } + } + + deserializer.deserialize_any(ValueVisitor) + } +} diff --git a/src/value/integer.rs b/src/value/integer.rs new file mode 100644 index 0000000..a9acd9b --- /dev/null +++ b/src/value/integer.rs @@ -0,0 +1,142 @@ +use serde::{ + de::{self, Visitor}, + forward_to_deserialize_any, Deserialize, Serialize, +}; +use std::fmt::{self, Debug, Display}; + +use crate::Error; + +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +#[repr(transparent)] +pub struct Integer { + inner: IntegerType, +} + +impl Debug for Integer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Integer({})", self) + } +} + +impl Display for Integer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.inner { + IntegerType::Signed(n) => f.write_str(itoa::Buffer::new().format(n)), + IntegerType::Unsigned(n) => f.write_str(itoa::Buffer::new().format(n)), + } + } +} + +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +enum IntegerType { + Signed(i64), + Unsigned(u64), +} + +impl From for Integer { + fn from(value: i64) -> Self { + Self { + inner: IntegerType::Signed(value), + } + } +} + +impl From for Integer { + fn from(value: u64) -> Self { + Self { + inner: IntegerType::Unsigned(value), + } + } +} + +impl Integer { + pub const fn is_i64(&self) -> bool { + match self.inner { + IntegerType::Unsigned(n) => n <= i64::MAX as u64, + IntegerType::Signed(_) => true, + } + } + + pub const fn is_u64(&self) -> bool { + matches!(self.inner, IntegerType::Unsigned(_)) + } + + pub const fn as_i64(&self) -> Option { + match self.inner { + IntegerType::Signed(n) => Some(n), + IntegerType::Unsigned(n) => { + if n <= i64::MAX as u64 { + Some(n as i64) + } else { + None + } + } + } + } + + pub const fn as_u64(&self) -> Option { + match self.inner { + IntegerType::Unsigned(n) => Some(n), + IntegerType::Signed(_) => None, + } + } +} + +impl Serialize for Integer { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self.inner { + IntegerType::Signed(integer) => serializer.serialize_i64(integer), + IntegerType::Unsigned(integer) => serializer.serialize_u64(integer), + } + } +} + +impl<'de> Deserialize<'de> for Integer { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + struct IntegerVisitor; + + impl<'de> Visitor<'de> for IntegerVisitor { + type Value = Integer; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("an Integer") + } + + fn visit_i64(self, value: i64) -> Result { + Ok(value.into()) + } + + fn visit_u64(self, value: u64) -> Result { + Ok(value.into()) + } + } + + deserializer.deserialize_any(IntegerVisitor) + } +} + +impl<'de> de::Deserializer<'de> for Integer { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + match self.inner { + IntegerType::Signed(integer) => visitor.visit_i64(integer), + IntegerType::Unsigned(integer) => visitor.visit_u64(integer), + } + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier ignored_any + } +} diff --git a/src/value/mod.rs b/src/value/mod.rs new file mode 100644 index 0000000..ba0a051 --- /dev/null +++ b/src/value/mod.rs @@ -0,0 +1,313 @@ +use std::borrow::Cow; + +use indexmap::IndexMap; + +mod de; +mod integer; +mod ser; + +pub use integer::Integer; + +use crate::Error; + +/// Object: Key-value collection that preserves insertion order +pub type Object = IndexMap; +pub type BorrowedObject<'input> = IndexMap, BorrowedValue<'input>>; + +/// Represents a Corn configuration value. +/// +/// This enum encompasses all possible value types in the Corn language specification +#[derive(Debug, Clone, PartialEq)] +pub enum Value { + /// A UTF-8 string value + String(String), + /// A 64-bit signed integer + Integer(Integer), + /// A 64-bit floating point number + Float(f64), + /// A boolean value (true or false) + Boolean(bool), + /// A key-value collection that preserves insertion order + Object(Object), + /// An ordered collection of values + Array(Vec), + /// Represents the absence of a value + Null, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum BorrowedValue<'input> { + String(Cow<'input, str>), + Integer(Integer), + Float(f64), + Boolean(bool), + Null, + Array(Vec>), + Object(BorrowedObject<'input>), +} + +impl BorrowedValue<'_> { + pub fn into_value(self) -> Value { + match self { + BorrowedValue::String(string) => Value::String(string.into_owned()), + BorrowedValue::Integer(integer) => Value::Integer(integer), + BorrowedValue::Float(float) => Value::Float(float), + BorrowedValue::Boolean(boolean) => Value::Boolean(boolean), + BorrowedValue::Null => Value::Null, + BorrowedValue::Array(array) => { + Value::Array(array.into_iter().map(Value::from).collect()) + } + BorrowedValue::Object(object) => Value::Object( + object + .into_iter() + .map(|(k, v)| (k.into_owned(), Value::from(v))) + .collect(), + ), + } + } + + pub const fn as_type(&self) -> &'static str { + match self { + Self::String(_) => "String", + Self::Integer(_) => "Integer", + Self::Float(_) => "Float", + Self::Boolean(_) => "Boolean", + Self::Null => "Null", + Self::Array(_) => "Array", + Self::Object(_) => "Object", + } + } + + pub(crate) fn invalid_type(&self, exp: &'static str) -> Error { + Error::DeserializationError(format!("Invalid type: {}, expected {exp}", self.as_type())) + } +} + +impl From> for Value { + fn from(entry: BorrowedValue<'_>) -> Self { + entry.into_value() + } +} + +impl Value { + /// Returns true if the value is a String. + pub const fn is_string(&self) -> bool { + matches!(self, Self::String(_)) + } + + /// Returns true if the value is an Integer. + pub const fn is_integer(&self) -> bool { + matches!(self, Self::Integer(_)) + } + + /// Returns true if the value is a Float. + pub const fn is_float(&self) -> bool { + matches!(self, Self::Float(_)) + } + + /// Returns true if the value is a Boolean. + pub const fn is_boolean(&self) -> bool { + matches!(self, Self::Boolean(_)) + } + + /// Returns true if the value is an Object. + pub const fn is_object(&self) -> bool { + matches!(self, Self::Object(_)) + } + + /// Returns true if the value is an Array. + pub const fn is_array(&self) -> bool { + matches!(self, Self::Array(_)) + } + + /// Returns true if the value is Null. + pub const fn is_null(&self) -> bool { + matches!(self, Self::Null) + } + + /// Returns the inner String if this value is a String, otherwise None. + pub fn as_string(&self) -> Option<&String> { + match self { + Self::String(s) => Some(s), + _ => None, + } + } + + /// Returns the inner Integer if this value is an Integer, otherwise None. + pub const fn as_integer(&self) -> Option<&Integer> { + match self { + Self::Integer(integer) => Some(integer), + _ => None, + } + } + + /// Returns the inner Float if this value is a Float, otherwise None. + pub const fn as_float(&self) -> Option<&f64> { + match self { + Self::Float(f) => Some(f), + _ => None, + } + } + + /// Returns the inner Boolean if this value is a Boolean, otherwise None. + pub const fn as_boolean(&self) -> Option<&bool> { + match self { + Self::Boolean(b) => Some(b), + _ => None, + } + } + + /// Returns the inner Object if this value is an Object, otherwise None. + pub fn as_object(&self) -> Option<&Object> { + match self { + Self::Object(obj) => Some(obj), + _ => None, + } + } + + /// Returns the inner Array if this value is an Array, otherwise None. + pub fn as_array(&self) -> Option<&Vec> { + match self { + Self::Array(arr) => Some(arr), + _ => None, + } + } + + /// Returns a mutable reference to the inner String if this value is a String, otherwise None. + pub fn as_string_mut(&mut self) -> Option<&mut String> { + match self { + Self::String(s) => Some(s), + _ => None, + } + } + + /// Returns a mutable reference to the inner Integer if this value is an Integer, otherwise None. + pub fn as_integer_mut(&mut self) -> Option<&mut Integer> { + match self { + Self::Integer(i) => Some(i), + _ => None, + } + } + + /// Returns a mutable reference to the inner Float if this value is a Float, otherwise None. + pub fn as_float_mut(&mut self) -> Option<&mut f64> { + match self { + Self::Float(f) => Some(f), + _ => None, + } + } + + /// Returns a mutable reference to the inner Boolean if this value is a Boolean, otherwise None. + pub fn as_boolean_mut(&mut self) -> Option<&mut bool> { + match self { + Self::Boolean(b) => Some(b), + _ => None, + } + } + + /// Returns a mutable reference to the inner Object if this value is an Object, otherwise None. + pub fn as_object_mut(&mut self) -> Option<&mut Object> { + match self { + Self::Object(obj) => Some(obj), + _ => None, + } + } + + /// Returns a mutable reference to the inner Array if this value is an Array, otherwise None. + pub fn as_array_mut(&mut self) -> Option<&mut Vec> { + match self { + Self::Array(arr) => Some(arr), + _ => None, + } + } + + /// Takes the inner String if this value is a String, otherwise None. + pub fn take_string(self) -> Option { + match self { + Self::String(s) => Some(s), + _ => None, + } + } + + /// Takes the inner Integer if this value is an Integer, otherwise None. + pub fn take_integer(self) -> Option { + match self { + Self::Integer(i) => Some(i), + _ => None, + } + } + + /// Takes the inner Float if this value is a Float, otherwise None. + pub fn take_float(self) -> Option { + match self { + Self::Float(f) => Some(f), + _ => None, + } + } + + /// Takes the inner Boolean if this value is a Boolean, otherwise None. + pub fn take_boolean(self) -> Option { + match self { + Self::Boolean(b) => Some(b), + _ => None, + } + } + + /// Takes the inner Object if this value is an Object, otherwise None. + pub fn take_object(self) -> Option { + match self { + Self::Object(obj) => Some(obj), + _ => None, + } + } + + /// Takes the inner Array if this value is an Array, otherwise None. + pub fn take_array(self) -> Option> { + match self { + Self::Array(arr) => Some(arr), + _ => None, + } + } + + /// Returns true if the value is empty. + /// An empty value is an empty String, empty Object, empty Array, or Null. + pub fn is_empty(&self) -> bool { + match self { + Self::String(s) => s.is_empty(), + Self::Object(obj) => obj.is_empty(), + Self::Array(arr) => arr.is_empty(), + Self::Null => true, + _ => false, + } + } + + /// Returns the number of elements in this Value. + /// For objects this is the number of key-value pairs, for arrays it's the number of elements, + /// for strings it's the string length, and for other types it's 0. + pub fn len(&self) -> usize { + match self { + Self::String(s) => s.len(), + Self::Object(obj) => obj.len(), + Self::Array(arr) => arr.len(), + _ => 0, + } + } + + /// Get a reference to a value in an object by key. + /// Returns None if the value is not an object or if the key doesn't exist. + pub fn get(&self, key: &str) -> Option<&Value> { + match self { + Self::Object(obj) => obj.get(key), + _ => None, + } + } + + /// Get a reference to a value in an array by index. + /// Returns None if the value is not an array or if the index is out of bounds. + pub fn get_index(&self, index: usize) -> Option<&Value> { + match self { + Self::Array(arr) => arr.get(index), + _ => None, + } + } +} diff --git a/src/value/ser.rs b/src/value/ser.rs new file mode 100644 index 0000000..aee31cc --- /dev/null +++ b/src/value/ser.rs @@ -0,0 +1,57 @@ +use serde::Serialize; + +use crate::{BorrowedValue, Value}; + +impl Serialize for Value { + #[inline] + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + Self::String(s) => serializer.serialize_str(s), + Self::Integer(i) => i.serialize(serializer), + Self::Float(f) => f.serialize(serializer), + Self::Boolean(v) => serializer.serialize_bool(*v), + Self::Object(obj) => { + use serde::ser::SerializeMap; + let mut map = serializer.serialize_map(Some(obj.len()))?; + + for (k, v) in obj { + map.serialize_entry(k, v)?; + } + + map.end() + } + Self::Array(v) => v.serialize(serializer), + Self::Null => serializer.serialize_none(), + } + } +} + +impl Serialize for BorrowedValue<'_> { + #[inline] + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + Self::String(s) => serializer.serialize_str(s), + Self::Integer(i) => i.serialize(serializer), + Self::Float(f) => f.serialize(serializer), + Self::Boolean(v) => serializer.serialize_bool(*v), + Self::Object(obj) => { + use serde::ser::SerializeMap; + let mut map = serializer.serialize_map(Some(obj.len()))?; + + for (k, v) in obj { + map.serialize_entry(k, v)?; + } + + map.end() + } + Self::Array(v) => v.serialize(serializer), + Self::Null => serializer.serialize_none(), + } + } +} diff --git a/tests/borrowed_tests.rs b/tests/borrowed_tests.rs new file mode 100644 index 0000000..40266be --- /dev/null +++ b/tests/borrowed_tests.rs @@ -0,0 +1,191 @@ +use corn::from_str; +use serde::Deserialize; +use std::borrow::Cow; +use std::fs; + +// Borrowed structure for basic string test +#[derive(Deserialize, Debug, PartialEq)] +struct BorrowedBasic<'a> { + #[serde(borrow)] + foo: Cow<'a, str>, +} + +// Borrowed structure for string test with multiple fields +#[derive(Deserialize, Debug, PartialEq)] +struct BorrowedString<'a> { + #[serde(borrow)] + foo: Cow<'a, str>, + #[serde(borrow)] + bar: Cow<'a, str>, + #[serde(borrow)] + baz: Cow<'a, str>, + #[serde(borrow)] + qux: Cow<'a, str>, +} + +// Borrowed structure using Cow for flexible ownership +#[derive(Deserialize, Debug, PartialEq)] +struct BorrowedStringCow<'a> { + #[serde(borrow)] + foo: Cow<'a, str>, + #[serde(borrow)] + bar: Cow<'a, str>, + #[serde(borrow)] + baz: Cow<'a, str>, + #[serde(borrow)] + qux: Cow<'a, str>, +} + +// Borrowed nested structure +#[derive(Deserialize, Debug, PartialEq)] +struct BorrowedNested<'a> { + #[serde(borrow)] + name: BorrowedName<'a>, + age: i64, + #[serde(borrow)] + gender: Cow<'a, str>, +} + +#[derive(Deserialize, Debug, PartialEq)] +struct BorrowedName<'a> { + #[serde(borrow)] + first: Cow<'a, str>, + #[serde(borrow)] + last: Cow<'a, str>, + #[serde(borrow)] + full: Cow<'a, str>, +} + +// Mixed borrowed and owned fields +#[derive(Deserialize, Debug, PartialEq)] +struct BorrowedMixed<'a> { + #[serde(borrow)] + name: BorrowedNameMixed<'a>, + age: i64, + employment: BorrowedEmployment<'a>, + #[serde(borrow)] + gender: Cow<'a, str>, +} + +#[derive(Deserialize, Debug, PartialEq)] +struct BorrowedNameMixed<'a> { + #[serde(borrow)] + first: Cow<'a, str>, + #[serde(borrow)] + last: Cow<'a, str>, + // full name might be interpolated, so we use Cow + #[serde(borrow)] + full: Cow<'a, str>, +} + +#[derive(Deserialize, Debug, PartialEq)] +struct BorrowedEmployment<'a> { + employed: bool, + #[serde(borrow)] + name: Cow<'a, str>, + #[serde(rename = "sinceYear")] + since_year: i64, +} + +#[test] +fn test_borrowed_basic() { + let root_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let input = fs::read_to_string(format!("{root_dir}/assets/inputs/basic.corn")).unwrap(); + + let config: BorrowedBasic = from_str(&input).unwrap(); + + assert_eq!(config.foo, "bar"); +} + +#[test] +fn test_borrowed_string() { + let root_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let input = fs::read_to_string(format!("{root_dir}/assets/inputs/string.corn")).unwrap(); + + let config: BorrowedString = from_str(&input).unwrap(); + + assert_eq!(config.foo, "bar"); + assert_eq!(config.bar, "\"\\\n\r\t"); + assert_eq!(config.baz, "a"); // Unicode escape \u{0061} + assert_eq!(config.qux, ""); +} + +#[test] +fn test_borrowed_string_cow() { + let root_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let input = fs::read_to_string(format!("{root_dir}/assets/inputs/string.corn")).unwrap(); + + let config: BorrowedStringCow = from_str(&input).unwrap(); + + assert_eq!(config.foo, "bar"); + assert_eq!(config.bar, "\"\\\n\r\t"); + assert_eq!(config.baz, "a"); + assert_eq!(config.qux, ""); +} + +#[test] +fn test_borrowed_nested() { + let root_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let input = fs::read_to_string(format!("{root_dir}/assets/inputs/complex.corn")).unwrap(); + + let config: BorrowedNested = from_str(&input).unwrap(); + + assert_eq!(config.name.first, "John"); + assert_eq!(config.name.last, "Smith"); + assert_eq!(config.name.full, "John Smith"); + assert_eq!(config.age, 32); + assert_eq!(config.gender, "M"); +} + +#[test] +fn test_borrowed_mixed() { + let root_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let input = fs::read_to_string(format!("{root_dir}/assets/inputs/complex.corn")).unwrap(); + + let config: BorrowedMixed = from_str(&input).unwrap(); + + assert_eq!(config.name.first, "John"); + assert_eq!(config.name.last, "Smith"); + assert_eq!(config.name.full, "John Smith"); + assert_eq!(config.age, 32); + assert_eq!(config.employment.employed, true); + assert_eq!(config.employment.name, "Postman"); + assert_eq!(config.employment.since_year, 2019); + assert_eq!(config.gender, "M"); +} + +// Test that demonstrates zero-copy behavior with lifetime constraints +#[test] +fn test_borrowed_lifetime_constraint() { + let root_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let input = fs::read_to_string(format!("{root_dir}/assets/inputs/basic.corn")).unwrap(); + + let config: BorrowedBasic; + + { + config = from_str(&input).unwrap(); + assert_eq!(config.foo, "bar"); + } + + // config can still be used here because input is still alive + assert_eq!(config.foo, "bar"); +} + +// Test array with borrowed string elements +#[derive(Deserialize, Debug, PartialEq)] +struct BorrowedArray<'a> { + #[serde(borrow)] + foo: Vec>, +} + +#[test] +fn test_borrowed_array() { + let test_input = r#"{ foo = ["hello" "world" "test"] }"#; + + let config: BorrowedArray = from_str(test_input).unwrap(); + + assert_eq!(config.foo.len(), 3); + assert_eq!(config.foo[0], "hello"); + assert_eq!(config.foo[1], "world"); + assert_eq!(config.foo[2], "test"); +} diff --git a/tests/de_tests.rs b/tests/de_tests.rs index b039970..a337801 100644 --- a/tests/de_tests.rs +++ b/tests/de_tests.rs @@ -237,7 +237,7 @@ struct ComplexParentsFatherBirthday { #[derive(Deserialize, Debug, PartialEq)] struct ComplexKeys { - #[serde(rename = "!\"£$%^&*()_")] + #[serde(rename = "!£$%^&*()_")] symbols: i64, #[serde(rename = "apple-pie")] apple_pie: ComplexKeysApplePie, @@ -289,9 +289,42 @@ struct InputDob { #[derive(Deserialize, Debug, PartialEq)] struct Integer { - foo: i64, - bar: i64, - baz: i64, + // Basic decimal integers + positive_decimal: i64, + negative_decimal: i64, + zero: i64, + + // Decimal with underscores + large_decimal: i64, + negative_large_decimal: i64, + decimal_with_underscores: i64, + + // Hexadecimal integers + hex_lowercase: i64, + hex_uppercase: i64, + hex_mixed: i64, + negative_hex: i64, + hex_with_underscores: i64, + negative_hex_underscores: i64, + hex_single_digit: i64, + hex_zero: i64, + + // Octal integers + octal_basic: i64, + octal_zero: i64, + negative_octal: i64, + octal_with_underscores: i64, + negative_octal_underscores: i64, + octal_single_digit: i64, + + // Binary integers + binary_basic: i64, + binary_zero: i64, + negative_binary: i64, + binary_with_underscores: i64, + negative_binary_underscores: i64, + binary_single_digit: i64, + binary_all_ones: i64, } #[derive(Deserialize, Debug, PartialEq)] @@ -443,6 +476,14 @@ struct ValueAfterTable { qux: bool, } +#[derive(Deserialize, Debug, PartialEq)] +struct Unicode { + plane0: String, + plane1: String, + plane2: String, + plane3: String, +} + generate_eq_tests!( (array, Array), (basic, Basic), @@ -594,3 +635,18 @@ fn null_unit() { assert_eq!(config, json_config); } + +#[test] +fn unicode() { + let test_name = "unicode"; + let root_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + + let input = fs::read_to_string(format!("{root_dir}/assets/inputs/{test_name}.corn")).unwrap(); + let config = from_str::(&input).unwrap(); + + let json_input = + fs::read_to_string(format!("{root_dir}/assets/outputs/json/{test_name}.json")).unwrap(); + let json_config = serde_json::from_str(&json_input).unwrap(); + + assert_eq!(config, json_config); +} diff --git a/tests/parser_tests.rs b/tests/parser_tests.rs index 7b6d8ff..4017816 100644 --- a/tests/parser_tests.rs +++ b/tests/parser_tests.rs @@ -1,5 +1,3 @@ -extern crate core; - use corn::parse; use paste::paste; use std::fs; @@ -100,6 +98,7 @@ generate_eq_tests!( spread, string, string_interpolation, + unicode, value_after_table, very_compact );