From a68cf12c1c6b690577b1e233bb5cae74d3aa7f7b Mon Sep 17 00:00:00 2001 From: Seamus Abshere Date: Tue, 14 Oct 2025 15:08:34 -0400 Subject: [PATCH 1/4] v1.1.0 - scrubcsv --output-stats-to-file --- Cargo.lock | 733 ++++++++++++++++++++++++++++++++-------- scrubcsv/CHANGELOG.md | 13 + scrubcsv/Cargo.toml | 16 +- scrubcsv/src/errors.rs | 4 +- scrubcsv/src/main.rs | 66 ++-- scrubcsv/src/util.rs | 2 +- scrubcsv/tests/tests.rs | 44 +++ 7 files changed, 714 insertions(+), 164 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a513c4..20f0d27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,27 +1,27 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" -version = "0.17.0" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aho-corasick" -version = "0.7.18" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -35,11 +35,61 @@ dependencies = [ "winapi", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", +] + [[package]] name = "anyhow" -version = "1.0.57" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "atty" @@ -54,17 +104,17 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.65" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11a17d453482a265fd5f8479f2a3f405566e6ca627837aaddb85af8b1ab8ef61" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-link", ] [[package]] @@ -74,16 +124,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] -name = "bstr" -version = "0.2.17" +name = "bitflags" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "catcsv" @@ -92,7 +142,7 @@ dependencies = [ "cli_test_dir", "csv", "docopt", - "env_logger", + "env_logger 0.9.3", "error-chain", "log", "serde", @@ -100,17 +150,11 @@ dependencies = [ "walkdir", ] -[[package]] -name = "cc" -version = "1.0.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" - [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "clap" @@ -120,7 +164,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", - "bitflags", + "bitflags 1.3.2", "strsim 0.8.0", "term_size", "textwrap", @@ -128,19 +172,65 @@ dependencies = [ "vec_map", ] +[[package]] +name = "clap" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4512b90fa68d3a9932cea5184017c5d200f5921df706d45e853537dea51508f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0025e98baa12e766c67ba13ff4695a887a1eba19569aad00a472546795bd6730" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim 0.11.1", + "terminal_size", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "clap_lex" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" + [[package]] name = "cli_test_dir" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bc63338a59538d4f4b767dfb6082e4d26736aadb5100894b76039a04d6ad519" +checksum = "cc7e8a289c9ba144ed96a2f5776b320192ccb2f22212b9aabf61fd15dedb4b3a" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "csv" -version = "1.1.6" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" dependencies = [ - "bstr", "csv-core", "itoa", "ryu", @@ -149,13 +239,22 @@ dependencies = [ [[package]] name = "csv-core" -version = "0.1.10" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" dependencies = [ "memchr", ] +[[package]] +name = "deranged" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071" +dependencies = [ + "powerfmt", +] + [[package]] name = "docopt" version = "1.1.1" @@ -168,11 +267,21 @@ dependencies = [ "strsim 0.10.0", ] +[[package]] +name = "env_filter" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +dependencies = [ + "log", + "regex", +] + [[package]] name = "env_logger" -version = "0.9.0" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" +checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" dependencies = [ "atty", "humantime", @@ -181,6 +290,29 @@ dependencies = [ "termcolor", ] +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "error-chain" version = "0.12.4" @@ -209,7 +341,7 @@ checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "synstructure", ] @@ -218,9 +350,9 @@ name = "fixed2csv" version = "1.0.0" dependencies = [ "csv", - "env_logger", + "env_logger 0.9.3", "failure", - "humansize", + "humansize 1.1.1", "humantime", "log", "structopt", @@ -233,7 +365,7 @@ dependencies = [ "cli_test_dir", "csv", "docopt", - "env_logger", + "env_logger 0.9.3", "error-chain", "lazy_static", "log", @@ -246,19 +378,19 @@ dependencies = [ [[package]] name = "gimli" -version = "0.26.1" +version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" [[package]] name = "hashcsv" version = "1.0.1" dependencies = [ "anyhow", - "clap", + "clap 2.34.0", "cli_test_dir", "csv", - "env_logger", + "env_logger 0.9.3", "log", "regex", "serde", @@ -276,6 +408,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -291,72 +429,160 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026" +[[package]] +name = "humansize" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" +dependencies = [ + "libm", +] + [[package]] name = "humantime" -version = "2.1.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itoa" -version = "0.4.8" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "js-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +dependencies = [ + "once_cell", + "wasm-bindgen", +] [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.126" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "libm" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "log" -version = "0.4.17" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "miniz_oxide" -version = "0.5.1" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ - "adler", + "adler2", ] [[package]] -name = "num_threads" -version = "0.1.6" +name = "num-conv" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" -dependencies = [ - "libc", -] +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" [[package]] name = "object" -version = "0.28.4" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e42c982f2d955fac81dd7e1d0e1426a7d702acd9c98d19ab01083a6a0328c424" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" dependencies = [ "memchr", ] +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -366,7 +592,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "version_check", ] @@ -383,56 +609,81 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.39" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.18" +version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] [[package]] name = "regex" -version = "1.5.6" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", + "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" -version = "0.1.10" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] [[package]] name = "regex-syntax" -version = "0.6.26" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "rustc-demangle" -version = "0.1.21" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags 2.9.4", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustversion" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.10" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" @@ -445,52 +696,75 @@ dependencies = [ [[package]] name = "scrubcsv" -version = "1.0.0" +version = "1.1.0" dependencies = [ - "clap", + "clap 4.5.49", "cli_test_dir", "csv", - "env_logger", - "humansize", + "env_logger 0.11.8", + "humansize 2.1.3", "lazy_static", "libc", "log", "regex", "serde", - "structopt", + "serde_json", ] [[package]] name = "serde" -version = "1.0.137" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.137" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.106", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", ] [[package]] name = "sha1_smol" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" +checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" [[package]] name = "snap" -version = "1.0.5" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "strsim" @@ -504,13 +778,19 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "structopt" version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" dependencies = [ - "clap", + "clap 2.34.0", "lazy_static", "structopt-derive", ] @@ -521,18 +801,29 @@ version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" dependencies = [ - "heck", + "heck 0.3.3", "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "syn" -version = "1.0.95" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -547,7 +838,7 @@ checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "unicode-xid", ] @@ -563,13 +854,23 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.3" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" +dependencies = [ + "rustix", + "windows-sys 0.60.2", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -582,45 +883,62 @@ dependencies = [ [[package]] name = "time" -version = "0.3.9" +version = "0.3.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd" +checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ - "libc", - "num_threads", + "deranged", + "num-conv", + "powerfmt", + "serde", + "time-core", ] +[[package]] +name = "time-core" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" + [[package]] name = "unicode-ident" -version = "1.0.0" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" [[package]] name = "unicode-segmentation" -version = "1.9.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.9" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-xid" -version = "0.2.3" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.1.0" +version = "1.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93bbc61e655a4833cf400d0d15bf3649313422fa7572886ad6dab16d79886365" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" dependencies = [ + "js-sys", "sha1_smol", + "wasm-bindgen", ] [[package]] @@ -631,21 +949,79 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" -version = "2.3.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", - "winapi", "winapi-util", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn 2.0.106", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +dependencies = [ + "unicode-ident", +] + [[package]] name = "winapi" version = "0.3.9" @@ -664,11 +1040,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "winapi", + "windows-sys 0.61.2", ] [[package]] @@ -676,3 +1052,92 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" diff --git a/scrubcsv/CHANGELOG.md b/scrubcsv/CHANGELOG.md index 983161a..1345320 100644 --- a/scrubcsv/CHANGELOG.md +++ b/scrubcsv/CHANGELOG.md @@ -5,6 +5,19 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.1.0] - 2025-10-14 + +### Added + +- `--output-stats-to-file ` option to write processing statistics to a JSON file. The output includes rows, bad_rows, elapsed_seconds, bytes_processed, and bytes_per_second. + +### Changed + +- Migrated from deprecated `structopt` to `clap` v4.5 with derive macros. +- Updated `env_logger` from 0.9 to 0.11. +- Updated `humansize` from 1.0 to 2.1. +- Updated all other dependencies to their latest compatible versions. + ## [1.0.0] - 2022-05-25 ### Added diff --git a/scrubcsv/Cargo.toml b/scrubcsv/Cargo.toml index 909a531..49dc61d 100644 --- a/scrubcsv/Cargo.toml +++ b/scrubcsv/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "scrubcsv" -version = "1.0.0" -authors = ["Eric Kidd "] +version = "1.1.0" +authors = ["Eric Kidd ", "Seamus Abshere "] edition = "2018" description = "Remove bad lines from large CSV files and normalize the rest" @@ -11,16 +11,16 @@ repository = "https://github.com/faradayio/csv-tools" homepage = "https://github.com/faradayio/csv-tools/tree/main/scrubcsv" [dependencies] -clap = { version = "2.33.0", features = ["wrap_help"] } +clap = { version = "4.5", features = ["derive", "wrap_help"] } csv = "1" -env_logger = "0.9.0" -humansize = "1.0.1" -lazy_static = "1.2.0" -libc = "0.2.18" +env_logger = "0.11" +humansize = "2.1" +lazy_static = "1.5" +libc = "0.2" log = "0.4" regex = "1" serde = "1.0" -structopt = "0.3.3" +serde_json = "1.0" [dev-dependencies] cli_test_dir = "0.1.1" diff --git a/scrubcsv/src/errors.rs b/scrubcsv/src/errors.rs index bf9a3c2..952f385 100644 --- a/scrubcsv/src/errors.rs +++ b/scrubcsv/src/errors.rs @@ -9,7 +9,7 @@ use std::{error, fmt, result}; /// Our error type. We used a boxed dynamic error because we don't care much /// about the details and we're only going to print it for the user anyways. -pub type Error = Box; +pub type Error = Box; /// Our custom `Result` type. Defaults the `E` parameter to our error type. pub type Result = result::Result; @@ -51,7 +51,7 @@ pub trait ResultExt: Sized { F: FnOnce(&E) -> C; } -impl ResultExt for Result { +impl ResultExt for Result { fn with_context(self, build_context: F) -> Result where C: Into, diff --git a/scrubcsv/src/main.rs b/scrubcsv/src/main.rs index 4bfe58a..1e783d4 100644 --- a/scrubcsv/src/main.rs +++ b/scrubcsv/src/main.rs @@ -2,11 +2,13 @@ #![forbid(unsafe_code)] // Import from other crates. +use clap::Parser; use csv::ByteRecord; -use humansize::{file_size_opts, FileSize}; +use humansize::{format_size, BINARY}; use lazy_static::lazy_static; use log::debug; use regex::{bytes::Regex as BytesRegex, Regex}; +use serde_json::json; use std::{ borrow::Cow, fs, @@ -15,7 +17,6 @@ use std::{ process, time::Instant, }; -use structopt::StructOpt; // Modules defined in separate files. mod clean_column_names; @@ -34,9 +35,10 @@ use crate::util::CharSpecifier; const BUFFER_SIZE: usize = 256 * 1024; /// Our command-line arguments. -#[derive(Debug, StructOpt)] -#[structopt( +#[derive(Debug, Parser)] +#[command( name = "scrubcsv", + version, about = "Clean and normalize a CSV file.", after_help = "Read a CSV file, normalize the \"good\" lines, and print them to standard output. Discard any lines with the wrong number of columns. @@ -58,9 +60,9 @@ struct Opt { /// Character used to separate fields in a row (must be a single ASCII /// byte, or "tab"). - #[structopt( + #[arg( value_name = "CHAR", - short = "d", + short = 'd', long = "delimiter", default_value = "," )] @@ -68,45 +70,49 @@ struct Opt { /// Convert values matching NULL_REGEX to an empty string. For a case-insensitive /// match, use `(?i)`: `--null '(?i)NULL'`. - #[structopt(value_name = "NULL_REGEX", short = "n", long = "null")] + #[arg(value_name = "NULL_REGEX", short = 'n', long = "null")] null: Option, /// Replace LF and CRLF sequences in values with spaces. This should improve /// compatibility with systems like BigQuery that don't expect newlines /// inside escaped strings. - #[structopt(long = "replace-newlines")] + #[arg(long = "replace-newlines")] replace_newlines: bool, /// Remove whitespace at beginning and end of each cell. - #[structopt(long = "trim-whitespace")] + #[arg(long = "trim-whitespace")] trim_whitespace: bool, /// Make sure column names are unique, and use only lowercase letters, /// numbers and underscores. "unique" (the default) will assign number /// prefixes to make names unique. "stable" will use a simple, predictable /// mapping, and fail with an error if the resulting names are not unique. - #[structopt(value_name = "CLEANER_TYPE", long = "clean-column-names")] + #[arg(value_name = "CLEANER_TYPE", long = "clean-column-names")] clean_column_names: Option>, /// Fail if the output CSV file would contain any column names matching the /// specified regular expression. - #[structopt(long = "reserve-column-names")] + #[arg(long = "reserve-column-names")] reserve_column_names: Option, /// Drop any rows where the specified column is empty or NULL. Can be passed /// more than once. Useful for cleaning primary key columns before /// upserting. Uses the cleaned form of column names. - #[structopt(value_name = "COL", long = "drop-row-if-null")] + #[arg(value_name = "COL", long = "drop-row-if-null")] drop_row_if_null: Vec, /// Do not print performance information. - #[structopt(short = "q", long = "quiet")] + #[arg(short = 'q', long = "quiet")] quiet: bool, /// Character used to quote entries. May be set to "none" to ignore all /// quoting. - #[structopt(value_name = "CHAR", long = "quote", default_value = "\"")] + #[arg(value_name = "CHAR", long = "quote", default_value = "\"")] quote: CharSpecifier, + + /// Output statistics to a JSON file at the specified path. + #[arg(value_name = "PATH", long = "output-stats-to-file")] + output_stats_to_file: Option, } impl Opt { @@ -134,8 +140,8 @@ fn run() -> Result<()> { // Set up logging. env_logger::init(); - // Parse our command-line arguments using `docopt`. - let opt: Opt = Opt::from_args(); + // Parse our command-line arguments using `clap`. + let opt: Opt = Opt::parse(); debug!("Options: {:#?}", opt); // Remember the time we started. @@ -348,19 +354,41 @@ fn run() -> Result<()> { // Flush all our buffers. wtr.flush().context("error writing records")?; + // Calculate statistics. + let ellapsed = start_time.elapsed().as_secs_f64(); + let bytes_processed = rdr.position().byte(); + let bytes_per_second = (bytes_processed as f64 / ellapsed) as i64; + // Print out some information about our run. if !opt.quiet { - let ellapsed = start_time.elapsed().as_secs_f64(); - let bytes_per_second = (rdr.position().byte() as f64 / ellapsed) as i64; eprintln!( "{} rows ({} bad) in {:.2} seconds, {}/sec", rows, bad_rows, ellapsed, - bytes_per_second.file_size(file_size_opts::BINARY)?, + format_size(bytes_per_second as u64, BINARY), ); } + // Output statistics to file if requested. + if let Some(stats_path) = &opt.output_stats_to_file { + let stats = json!({ + "rows": rows, + "bad_rows": bad_rows, + "elapsed_seconds": ellapsed, + "bytes_processed": bytes_processed, + "bytes_per_second": bytes_per_second, + }); + fs::write( + stats_path, + serde_json::to_string_pretty(&stats) + .context("failed to serialize stats")?, + ) + .with_context(|_| { + format!("failed to write stats to {}", stats_path.display()) + })?; + } + // If more than 10% of rows are bad, assume something has gone horribly // wrong. if bad_rows.checked_mul(10).expect("multiplication overflow") > rows { diff --git a/scrubcsv/src/util.rs b/scrubcsv/src/util.rs index d0e7227..8117af9 100644 --- a/scrubcsv/src/util.rs +++ b/scrubcsv/src/util.rs @@ -6,7 +6,7 @@ use crate::errors::*; /// Specifies an optional single-byte character used to configure our CSV /// parser. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct CharSpecifier(Option); impl CharSpecifier { diff --git a/scrubcsv/tests/tests.rs b/scrubcsv/tests/tests.rs index 05cd114..c650386 100644 --- a/scrubcsv/tests/tests.rs +++ b/scrubcsv/tests/tests.rs @@ -1,6 +1,7 @@ //! Integration tests for our CLI. use cli_test_dir::*; +use serde_json::Value; #[test] fn help_flag() { @@ -256,3 +257,46 @@ a,b,c "# ); } + +#[test] +fn output_stats_to_file() { + let testdir = TestDir::new("scrubcsv", "output_stats_to_file"); + testdir.create_file( + "in.csv", + "\ +a,b,c +1,2,3 +4,5,6 +7,8,9 +", + ); + let stats_file = testdir.path("stats.json"); + let output = testdir + .cmd() + .arg("--output-stats-to-file") + .arg(&stats_file) + .arg("in.csv") + .expect_success(); + + assert_eq!( + output.stdout_str(), + "\ +a,b,c +1,2,3 +4,5,6 +7,8,9 +" + ); + + let stats_content = + std::fs::read_to_string(&stats_file).expect("failed to read stats file"); + let stats: Value = + serde_json::from_str(&stats_content).expect("failed to parse stats JSON"); + + assert_eq!(stats["rows"], 4); + assert_eq!(stats["bad_rows"], 0); + assert!(stats["elapsed_seconds"].is_number()); + assert!(stats["bytes_processed"].is_number()); + assert!(stats["bytes_per_second"].is_number()); + assert!(stats["bytes_processed"].as_u64().unwrap() > 0); +} From ebae7b8c4c86b3a636344f0ef59c2ffde554d558 Mon Sep 17 00:00:00 2001 From: Seamus Abshere Date: Tue, 14 Oct 2025 15:27:44 -0400 Subject: [PATCH 2/4] udpates --- Cargo.lock | 298 +++-------------------------------------- build-release | 2 +- catcsv/CHANGELOG.md | 6 + catcsv/Cargo.toml | 4 +- deny.toml | 14 +- fixed2csv/CHANGELOG.md | 9 ++ fixed2csv/Cargo.toml | 10 +- fixed2csv/src/main.rs | 22 ++- geochunk/CHANGELOG.md | 6 + geochunk/Cargo.toml | 4 +- hashcsv/CHANGELOG.md | 8 ++ hashcsv/Cargo.toml | 7 +- hashcsv/src/main.rs | 15 ++- 13 files changed, 85 insertions(+), 320 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 20f0d27..4935224 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,15 +26,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] - [[package]] name = "anstream" version = "0.6.21" @@ -91,17 +82,6 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - [[package]] name = "backtrace" version = "0.3.76" @@ -117,12 +97,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.9.4" @@ -137,12 +111,12 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "catcsv" -version = "1.0.0" +version = "1.0.1" dependencies = [ "cli_test_dir", "csv", "docopt", - "env_logger 0.9.3", + "env_logger", "error-chain", "log", "serde", @@ -156,22 +130,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "ansi_term", - "atty", - "bitflags 1.3.2", - "strsim 0.8.0", - "term_size", - "textwrap", - "unicode-width", - "vec_map", -] - [[package]] name = "clap" version = "4.5.49" @@ -201,10 +159,10 @@ version = "4.5.49" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", - "syn 2.0.106", + "syn", ] [[package]] @@ -277,19 +235,6 @@ dependencies = [ "regex", ] -[[package]] -name = "env_logger" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - [[package]] name = "env_logger" version = "0.11.8" @@ -323,49 +268,27 @@ dependencies = [ "version_check", ] -[[package]] -name = "failure" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86" -dependencies = [ - "backtrace", - "failure_derive", -] - -[[package]] -name = "failure_derive" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "synstructure", -] - [[package]] name = "fixed2csv" -version = "1.0.0" +version = "1.0.1" dependencies = [ + "anyhow", + "clap", "csv", - "env_logger 0.9.3", - "failure", - "humansize 1.1.1", + "env_logger", + "humansize", "humantime", "log", - "structopt", ] [[package]] name = "geochunk" -version = "1.0.0" +version = "1.0.1" dependencies = [ "cli_test_dir", "csv", "docopt", - "env_logger 0.9.3", + "env_logger", "error-chain", "lazy_static", "log", @@ -384,51 +307,26 @@ checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" [[package]] name = "hashcsv" -version = "1.0.1" +version = "1.0.2" dependencies = [ "anyhow", - "clap 2.34.0", + "clap", "cli_test_dir", "csv", - "env_logger 0.9.3", + "env_logger", "log", "regex", "serde", - "structopt", "time", "uuid", ] -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "humansize" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02296996cb8796d7c6e3bc2d9211b7802812d36999a51bb754123ead7d37d026" - [[package]] name = "humansize" version = "2.1.3" @@ -477,7 +375,7 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn", ] [[package]] @@ -583,30 +481,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - [[package]] name = "proc-macro2" version = "1.0.101" @@ -666,7 +540,7 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags 2.9.4", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -698,11 +572,11 @@ dependencies = [ name = "scrubcsv" version = "1.1.0" dependencies = [ - "clap 4.5.49", + "clap", "cli_test_dir", "csv", - "env_logger 0.11.8", - "humansize 2.1.3", + "env_logger", + "humansize", "lazy_static", "libc", "log", @@ -738,7 +612,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn", ] [[package]] @@ -766,12 +640,6 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - [[package]] name = "strsim" version = "0.10.0" @@ -784,41 +652,6 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "structopt" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" -dependencies = [ - "clap 2.34.0", - "lazy_static", - "structopt-derive", -] - -[[package]] -name = "structopt-derive" -version = "0.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" -dependencies = [ - "heck 0.3.3", - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.106" @@ -830,37 +663,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "synstructure" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "unicode-xid", -] - -[[package]] -name = "term_size" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e4129646ca0ed8f45d09b929036bafad5377103edd06e50bf574b353d2b08d9" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - [[package]] name = "terminal_size" version = "0.4.3" @@ -871,16 +673,6 @@ dependencies = [ "windows-sys 0.60.2", ] -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "term_size", - "unicode-width", -] - [[package]] name = "time" version = "0.3.44" @@ -906,24 +698,6 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" -[[package]] -name = "unicode-segmentation" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" - -[[package]] -name = "unicode-width" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" - -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - [[package]] name = "utf8parse" version = "0.2.2" @@ -941,12 +715,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" version = "0.9.5" @@ -986,7 +754,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.106", + "syn", "wasm-bindgen-shared", ] @@ -1008,7 +776,7 @@ checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1022,22 +790,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.11" @@ -1047,12 +799,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-link" version = "0.2.1" diff --git a/build-release b/build-release index bc8d80d..1825e12 100755 --- a/build-release +++ b/build-release @@ -23,7 +23,7 @@ VERSION="$RELEASE_VERSION" # Install 'cargo deny'. echo "Installing cargo-deny" -cargo_deny_version=0.12.1 +cargo_deny_version=0.18.5 cargo_deny_basename=cargo-deny-$cargo_deny_version-$HOST curl -fLO https://github.com/EmbarkStudios/cargo-deny/releases/download/$cargo_deny_version/$cargo_deny_basename.tar.gz tar xf $cargo_deny_basename.tar.gz diff --git a/catcsv/CHANGELOG.md b/catcsv/CHANGELOG.md index 909bd32..897037d 100644 --- a/catcsv/CHANGELOG.md +++ b/catcsv/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.1] - 2025-10-14 + +### Changed + +- Updated `env_logger` from 0.9 to 0.11 + ## [1.0.0] - 2022-05-25 ### Added diff --git a/catcsv/Cargo.toml b/catcsv/Cargo.toml index eb41719..25b0b54 100644 --- a/catcsv/Cargo.toml +++ b/catcsv/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "catcsv" -version = "1.0.0" +version = "1.0.1" authors = ["Eric Kidd "] edition = "2018" @@ -16,7 +16,7 @@ cli_test_dir = "0.1.2" [dependencies] csv = "1" docopt = "1" -env_logger = "0.9.0" +env_logger = "0.11" error-chain = "0.12.4" log = "0.4.14" serde = "1.0.123" diff --git a/deny.toml b/deny.toml index baaa035..01d9dd4 100644 --- a/deny.toml +++ b/deny.toml @@ -3,24 +3,18 @@ # These policies can be enforced using `cargo deny check`. [licenses] -# Don't allow code with an unclear license. -unlicensed = "deny" - -# Don't allow "copylefted" licenses unless they're listed below. -copyleft = "deny" +# In cargo-deny 0.16.0+, all licenses are denied by default unless explicitly +# allowed. This means unlicensed code, copyleft licenses, and AGPL are +# automatically denied since they're not in the allow list. # Allow common non-restrictive licenses. -allow = ["MIT", "Apache-2.0", "BSD-3-Clause", "CC0-1.0"] +allow = ["MIT", "Apache-2.0", "BSD-3-Clause", "CC0-1.0", "Unicode-3.0"] # Also fine to allow. ISC is used for various DNS and crypto things, and it's a # minimally restrictive open source license. # # "BSD-2-Clause", "ISC", "OpenSSL", "Zlib" -# Many organizations ban AGPL-licensed code -# https://opensource.google/docs/using/agpl-policy/ -deny = ["AGPL-3.0"] - [bans] # Do we want to know about multiple versions of the same dependency? multiple-versions = "allow" diff --git a/fixed2csv/CHANGELOG.md b/fixed2csv/CHANGELOG.md index df03b73..739f53d 100644 --- a/fixed2csv/CHANGELOG.md +++ b/fixed2csv/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.1] - 2025-10-14 + +### Changed + +- Updated `env_logger` from 0.9 to 0.11 +- Updated `humansize` from 1 to 2 +- Migrated from `structopt` to `clap` 4 derive +- Replaced unmaintained `failure` crate with `anyhow` + ## [1.0.0] - 2022-05-25 ### Added diff --git a/fixed2csv/Cargo.toml b/fixed2csv/Cargo.toml index 5c4fee7..10ef1be 100644 --- a/fixed2csv/Cargo.toml +++ b/fixed2csv/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fixed2csv" -version = "1.0.0" +version = "1.0.1" authors = ["Eric Kidd "] edition = "2018" @@ -11,10 +11,10 @@ repository = "https://github.com/faradayio/csv-tools" homepage = "https://github.com/faradayio/csv-tools/tree/main/fixed2csv" [dependencies] +anyhow = "1" +clap = { version = "4", features = ["derive"] } csv = "1.0.2" -env_logger = "0.9.0" -failure = "0.1.3" -humansize = "1" +env_logger = "0.11" +humansize = "2" humantime = "2" log = "0.4.6" -structopt = "0.3.21" diff --git a/fixed2csv/src/main.rs b/fixed2csv/src/main.rs index 7376222..be13098 100644 --- a/fixed2csv/src/main.rs +++ b/fixed2csv/src/main.rs @@ -1,5 +1,6 @@ -use failure::Error; -use humansize::{file_size_opts, FileSize}; +use anyhow::Error; +use clap::Parser; +use humansize::{format_size, BINARY}; use humantime::format_duration; use log::debug; use std::{ @@ -7,13 +8,12 @@ use std::{ io::{prelude::*, stdin, stdout, BufReader}, time::{Duration, SystemTime}, }; -use structopt::StructOpt; -#[derive(Debug, StructOpt)] -/// Convert fixed-width fields on stdin to CSV data on stdout. +#[derive(Debug, Parser)] +#[command(about, version)] struct Opt { /// Print summary statistics. - #[structopt(short = "v", long = "verbose")] + #[arg(short = 'v', long = "verbose")] verbose: bool, /// One of more field widths, as separate command-line arguments. @@ -22,7 +22,7 @@ struct Opt { /// Our main entry point. fn main() -> Result<(), Error> { - let opt = Opt::from_args(); + let opt = Opt::parse(); debug!("Options: {:?}", opt); // Keep track of how much time this takes. @@ -42,13 +42,9 @@ fn main() -> Result<(), Error> { }; eprintln!( "Processed {} in {}, {}/s", - total - .file_size(file_size_opts::BINARY) - .expect("size can never be negative"), + format_size(total, BINARY), format_duration(simple_elapsed), - (total as u64 / elapsed.as_secs()) - .file_size(file_size_opts::BINARY) - .expect("size can never be negative"), + format_size(total as u64 / elapsed.as_secs(), BINARY), ); } diff --git a/geochunk/CHANGELOG.md b/geochunk/CHANGELOG.md index 11f791d..0e6a866 100644 --- a/geochunk/CHANGELOG.md +++ b/geochunk/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.1] - 2025-10-14 + +### Changed + +- Updated `env_logger` from 0.9 to 0.11 + ## [1.0.0] - 2022-05-25 ### Added diff --git a/geochunk/Cargo.toml b/geochunk/Cargo.toml index f0274db..43240e1 100644 --- a/geochunk/Cargo.toml +++ b/geochunk/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "geochunk" -version = "1.0.0" +version = "1.0.1" authors = ["Eric Kidd "] edition = "2018" @@ -16,7 +16,7 @@ cli_test_dir = "0.1.2" [dependencies] csv = "1" docopt = "1.1" -env_logger = "0.9.0" +env_logger = "0.11" error-chain = "0.12.4" lazy_static = "1.4" log = "0.4.14" diff --git a/hashcsv/CHANGELOG.md b/hashcsv/CHANGELOG.md index f5430ad..f999349 100644 --- a/hashcsv/CHANGELOG.md +++ b/hashcsv/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.2] - 2025-10-14 + +### Changed + +- Updated `env_logger` from 0.9 to 0.11 +- Migrated from `structopt` to `clap` 4 derive +- Updated `clap` from 2.33 to 4 + ## [1.0.1] - 2022-05-25 ### Added diff --git a/hashcsv/Cargo.toml b/hashcsv/Cargo.toml index 385634b..4e01797 100644 --- a/hashcsv/Cargo.toml +++ b/hashcsv/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hashcsv" -version = "1.0.1" +version = "1.0.2" authors = ["Eric Kidd "] edition = "2018" description = "Append an `id` column to each row of a CSV file, containing a UUID v5 hash of the row" @@ -12,13 +12,12 @@ homepage = "https://github.com/faradayio/csv-tools/blob/main/hashcsv/README.md" [dependencies] anyhow = "1.0.38" -clap = { version = "2.33.3", features = ["wrap_help"] } +clap = { version = "4", features = ["derive", "wrap_help"] } csv = "1.1.5" -env_logger = "0.9.0" +env_logger = "0.11" log = "0.4.14" regex = "1.4.3" serde = "1.0.123" -structopt = "0.3.21" time = "0.3.9" uuid = { version = "1.0.0", features = ["v5"] } diff --git a/hashcsv/src/main.rs b/hashcsv/src/main.rs index b9f4921..5862223 100644 --- a/hashcsv/src/main.rs +++ b/hashcsv/src/main.rs @@ -1,4 +1,5 @@ use anyhow::{Context, Result}; +use clap::Parser; use csv::ByteRecord; use log::debug; use std::{ @@ -7,7 +8,6 @@ use std::{ path::PathBuf, process, }; -use structopt::StructOpt; use uuid::Uuid; /// Use reasonably large input and output buffers. In other CSV tools, this @@ -16,16 +16,17 @@ use uuid::Uuid; const BUFFER_SIZE: usize = 256 * 1024; /// Command-line options. -#[derive(Debug, StructOpt)] -#[structopt( - about = "Add an `id` column to a CSV file based on a hash of the other columns" +#[derive(Debug, Parser)] +#[command( + about = "Add an `id` column to a CSV file based on a hash of the other columns", + version )] struct Opt { /// Input file (uses stdin if omitted). input: Option, /// The column name for the new, hash-based ID column. - #[structopt(long = "id-column-name", short = "c", default_value = "id")] + #[arg(long = "id-column-name", short = 'c', default_value = "id")] id_column_name: String, } @@ -34,8 +35,8 @@ fn main() { // Set up logging. env_logger::init(); - // Parse our command-line arguments using `docopt`. - let opt: Opt = Opt::from_args(); + // Parse our command-line arguments. + let opt: Opt = Opt::parse(); debug!("Options: {:#?}", opt); if let Err(err) = run(&opt) { From 0acac452f1fc5865508076bea00702c96b491022 Mon Sep 17 00:00:00 2001 From: Seamus Abshere Date: Tue, 14 Oct 2025 15:33:17 -0400 Subject: [PATCH 3/4] stuff --- build-release | 2 +- catcsv/src/main.rs | 9 +++++---- geochunk/src/errors.rs | 1 + scrubcsv/src/util.rs | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/build-release b/build-release index 1825e12..71608af 100755 --- a/build-release +++ b/build-release @@ -40,7 +40,7 @@ cargo test --package "$PKG_NAME" # Install cross if needed. if [ "$CARGO_COMMAND" = "cross" ]; then echo "Installing cross for cross-compilation" - cross_version=v0.2.1 + cross_version=v0.2.5 cross_basename=cross-$cross_version-$HOST curl -fLO https://github.com/rust-embedded/cross/releases/download/$cross_version/$cross_basename.tar.gz tar xf $cross_basename.tar.gz diff --git a/catcsv/src/main.rs b/catcsv/src/main.rs index bc7ea78..9e9154f 100644 --- a/catcsv/src/main.rs +++ b/catcsv/src/main.rs @@ -20,6 +20,7 @@ use walkdir::WalkDir; use crate::errors::*; /// A module to hold `Error`, etc., types generated by `error-chain`. +#[allow(unexpected_cfgs)] mod errors { use error_chain::error_chain; use std::io; @@ -111,15 +112,15 @@ fn run() -> Result<()> { // Check the filename to see if we can handle this file type. if filename.ends_with(".csv") { debug!("Processing as *.csv"); - let mut file = File::open(path).chain_err(&mkerr)?; + let mut file = File::open(path).chain_err(mkerr)?; output_csv(&mut file, &mut first_headers, &mut out) - .chain_err(&mkerr)?; + .chain_err(mkerr)?; } else if filename.ends_with(".csv.sz") { debug!("Processing as *.csv.sz"); - let file = File::open(path).chain_err(&mkerr)?; + let file = File::open(path).chain_err(mkerr)?; let mut decompressed = snap::read::FrameDecoder::new(file); output_csv(&mut decompressed, &mut first_headers, &mut out) - .chain_err(&mkerr)?; + .chain_err(mkerr)?; } else { let msg = format!("{} does not appear to be a CSV file", path.display()); diff --git a/geochunk/src/errors.rs b/geochunk/src/errors.rs index c444900..2b4f39c 100644 --- a/geochunk/src/errors.rs +++ b/geochunk/src/errors.rs @@ -1,4 +1,5 @@ //! A module to hold `Error`, etc., types generated by `error-chain`. +#![allow(unexpected_cfgs)] use std::io; diff --git a/scrubcsv/src/util.rs b/scrubcsv/src/util.rs index 8117af9..9c09ae4 100644 --- a/scrubcsv/src/util.rs +++ b/scrubcsv/src/util.rs @@ -20,7 +20,7 @@ impl FromStr for CharSpecifier { type Err = Error; fn from_str(s: &str) -> Result { - if s.as_bytes().len() == 1 { + if s.len() == 1 { Ok(CharSpecifier(Some(s.as_bytes()[0]))) } else { match s { From d67260b1386ef49b7cef53e96f1dc9e64c4ace78 Mon Sep 17 00:00:00 2001 From: Seamus Abshere Date: Tue, 14 Oct 2025 15:36:24 -0400 Subject: [PATCH 4/4] fix cross --- build-release | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build-release b/build-release index 71608af..cf7cb28 100755 --- a/build-release +++ b/build-release @@ -41,8 +41,8 @@ cargo test --package "$PKG_NAME" if [ "$CARGO_COMMAND" = "cross" ]; then echo "Installing cross for cross-compilation" cross_version=v0.2.5 - cross_basename=cross-$cross_version-$HOST - curl -fLO https://github.com/rust-embedded/cross/releases/download/$cross_version/$cross_basename.tar.gz + cross_basename=cross-$HOST + curl -fLO https://github.com/cross-rs/cross/releases/download/$cross_version/$cross_basename.tar.gz tar xf $cross_basename.tar.gz mv cross /usr/local/bin/ rm -rf $cross_basename.tar.gz